def undertaker(worker_number=1, total_workers=1, chunk_size=5, once=False): """ Main loop to select and delete dids. """ logging.info('Undertaker(%s): starting' % worker_number) logging.info('Undertaker(%s): started' % worker_number) while not graceful_stop.is_set(): try: dids = list_expired_dids(worker_number=worker_number, total_workers=total_workers, limit=10000) if not dids and not once: logging.info('Undertaker(%s): Nothing to do. sleep 60.' % worker_number) time.sleep(60) continue for chunk in chunks(dids, chunk_size): try: logging.info('Undertaker(%s): Receive %s dids to delete' % (worker_number, len(chunk))) delete_dids(dids=chunk, account='root') logging.info('Undertaker(%s): Delete %s dids' % (worker_number, len(chunk))) record_counter(counters='undertaker.delete_dids', delta=len(chunk)) except DatabaseException, e: logging.error('Undertaker(%s): Got database error %s.' % (worker_number, str(e))) except: logging.error(traceback.format_exc()) time.sleep(1) if once: break logging.info('Undertaker(%s): graceful stop requested' % worker_number) logging.info('Undertaker(%s): graceful stop done' % worker_number)
def test_delete_dids(self): """ DATA IDENTIFIERS (CORE): Delete dids """ tmp_scope = 'mock' dsns = [{'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'did_type': DIDType.DATASET} for i in xrange(5)] for dsn in dsns: add_did(scope=tmp_scope, name=dsn['name'], type='DATASET', account='root') delete_dids(dids=dsns, account='root')
def undertaker(worker_number=1, total_workers=1, chunk_size=5, once=False): """ Main loop to select and delete dids. """ logging.info('Undertaker(%s): starting', worker_number) logging.info('Undertaker(%s): started', worker_number) executable = 'undertaker' hostname = socket.gethostname() pid = os.getpid() thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) paused_dids = {} # {(scope, name): datetime} while not GRACEFUL_STOP.is_set(): try: heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, older_than=6000) logging.info('Undertaker({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}'.format(locals())) # Refresh paused dids iter_paused_dids = deepcopy(paused_dids) for key in iter_paused_dids: if datetime.utcnow() > paused_dids[key]: del paused_dids[key] dids = list_expired_dids(worker_number=heartbeat['assign_thread'], total_workers=heartbeat['nr_threads'], limit=10000) dids = [did for did in dids if (did['scope'], did['name']) not in paused_dids] if not dids and not once: logging.info('Undertaker(%s): Nothing to do. sleep 60.', worker_number) time.sleep(60) continue for chunk in chunks(dids, chunk_size): try: logging.info('Undertaker(%s): Receive %s dids to delete', worker_number, len(chunk)) delete_dids(dids=chunk, account=InternalAccount('root'), expire_rules=True) logging.info('Undertaker(%s): Delete %s dids', worker_number, len(chunk)) record_counter(counters='undertaker.delete_dids', delta=len(chunk)) except RuleNotFound as error: logging.error(error) except (DatabaseException, DatabaseError, UnsupportedOperation) as e: if match('.*ORA-00054.*', str(e.args[0])) or match('.*55P03.*', str(e.args[0])) or match('.*3572.*', str(e.args[0])): for did in chunk: paused_dids[(did['scope'], did['name'])] = datetime.utcnow() + timedelta(seconds=randint(600, 2400)) record_counter('undertaker.delete_dids.exceptions.LocksDetected') logging.warning('undertaker[%s/%s]: Locks detected for chunk', heartbeat['assign_thread'], heartbeat['nr_threads']) else: logging.error('Undertaker(%s): Got database error %s.', worker_number, str(e)) except: logging.critical(traceback.format_exc()) time.sleep(1) if once: break die(executable=executable, hostname=hostname, pid=pid, thread=thread) logging.info('Undertaker(%s): graceful stop requested', worker_number) logging.info('Undertaker(%s): graceful stop done', worker_number)
def undertaker(worker_number=1, total_workers=1, chunk_size=5, once=False): """ Main loop to select and delete dids. """ logging.info('Undertaker(%s): starting', worker_number) logging.info('Undertaker(%s): started', worker_number) hostname = socket.gethostname() pid = os.getpid() thread = threading.current_thread() sanity_check(executable='rucio-undertaker', hostname=hostname) while not GRACEFUL_STOP.is_set(): try: heartbeat = live(executable='rucio-undertaker', hostname=hostname, pid=pid, thread=thread, older_than=6000) logging.info( 'Undertaker({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}' .format(locals())) dids = list_expired_dids(worker_number=heartbeat['assign_thread'] + 1, total_workers=heartbeat['nr_threads'], limit=10000) if not dids and not once: logging.info('Undertaker(%s): Nothing to do. sleep 60.', worker_number) time.sleep(60) continue for chunk in chunks(dids, chunk_size): try: logging.info('Undertaker(%s): Receive %s dids to delete', worker_number, len(chunk)) delete_dids(dids=chunk, account='root', expire_rules=True) logging.info('Undertaker(%s): Delete %s dids', worker_number, len(chunk)) record_counter(counters='undertaker.delete_dids', delta=len(chunk)) except RuleNotFound as error: logging.error(error) except DatabaseException as error: logging.error('Undertaker(%s): Got database error %s.', worker_number, str(error)) except: logging.critical(traceback.format_exc()) time.sleep(1) if once: break die(executable='rucio-undertaker', hostname=hostname, pid=pid, thread=thread) logging.info('Undertaker(%s): graceful stop requested', worker_number) logging.info('Undertaker(%s): graceful stop done', worker_number)
def testdid(vo): did_name = 'testdid_%s' % generate_uuid() mock_scope = InternalScope('mock', vo=vo) didtype = 'DATASET' account = InternalAccount('root', vo=vo) add_did(scope=mock_scope, name=did_name, type=didtype, account=account) yield {'name': did_name, 'scope': mock_scope} delete_dids(dids=[{'name': did_name, 'scope': mock_scope, 'did_type': didtype, 'purge_replicas': True}], account=account)
def test_delete_dids(self): """ DATA IDENTIFIERS (CORE): Delete dids """ tmp_scope = 'mock' dsns = [{'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'purge_replicas': False, 'did_type': DIDType.DATASET} for i in range(5)] for dsn in dsns: add_did(scope=tmp_scope, name=dsn['name'], type='DATASET', account='root') delete_dids(dids=dsns, account='root')
def run_once(paused_dids: Dict[Tuple, datetime], chunk_size: int, heartbeat_handler: HeartbeatHandler, **_kwargs): worker_number, total_workers, logger = heartbeat_handler.live() try: # Refresh paused dids iter_paused_dids = deepcopy(paused_dids) for key in iter_paused_dids: if datetime.utcnow() > paused_dids[key]: del paused_dids[key] dids = list_expired_dids(worker_number=worker_number, total_workers=total_workers, limit=10000) dids = [ did for did in dids if (did['scope'], did['name']) not in paused_dids ] if not dids: logger(logging.INFO, 'did not get any work') return for chunk in chunks(dids, chunk_size): _, _, logger = heartbeat_handler.live() try: logger(logging.INFO, 'Receive %s dids to delete', len(chunk)) delete_dids(dids=chunk, account=InternalAccount('root', vo='def'), expire_rules=True) logger(logging.INFO, 'Delete %s dids', len(chunk)) record_counter(name='undertaker.delete_dids', delta=len(chunk)) except RuleNotFound as error: logger(logging.ERROR, error) except (DatabaseException, DatabaseError, UnsupportedOperation) as e: if match('.*ORA-00054.*', str(e.args[0])) or match( '.*55P03.*', str(e.args[0])) or match( '.*3572.*', str(e.args[0])): for did in chunk: paused_dids[( did['scope'], did['name'])] = datetime.utcnow() + timedelta( seconds=randint(600, 2400)) record_counter( 'undertaker.delete_dids.exceptions.{exception}', labels={'exception': 'LocksDetected'}) logger(logging.WARNING, 'Locks detected for chunk') else: logger(logging.ERROR, 'Got database error %s.', str(e)) except: logging.critical(traceback.format_exc())
def dataset(db_session, vo): scope = InternalScope(scope='mock', vo=vo) name = generate_uuid() account = InternalAccount('root', vo=vo) kwargs = {'scope': scope, 'name': name, 'did_type': DIDType.DATASET, 'account': account} add_did(**kwargs, session=db_session) db_session.commit() yield kwargs del kwargs['account'] kwargs['purge_replicas'] = True delete_dids(dids=[kwargs], account=account) db_session.commit()
def request_transfer(loop=1, src=None, dst=None, upload=False, same_src=False, same_dst=False): """ Main loop to request a new transfer. """ logging.info('request: starting') session = get_session() src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) logging.info('request: started') i = 0 while not graceful_stop.is_set(): if i >= loop: return try: if not same_src: src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) if not same_dst: dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) tmp_name = generate_uuid() # add a new dataset scope = InternalScope('mock') account = InternalAccount('root') did.add_did(scope=scope, name='dataset-%s' % tmp_name, type=DIDType.DATASET, account=account, session=session) # construct PFN pfn = rsemanager.lfns2pfns(src_rse, lfns=[{ 'scope': scope.external, 'name': 'file-%s' % tmp_name }])['%s:file-%s' % (scope.external, tmp_name)] if upload: # create the directories if needed p = rsemanager.create_protocol(src_rse, operation='write', scheme='srm') p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical( 'Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, session=session) break # add the replica replica.add_replica(rse_id=src_rse['id'], scope=scope, name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account=account, session=session) logging.info('added replica on %s for DID mock:%s' % (src_rse['rse'], tmp_name)) # to the dataset did.attach_dids(scope=scope, name='dataset-%s' % tmp_name, dids=[{ 'scope': scope, 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes') }], account=account, session=session) # add rule for the dataset rule.add_rule(dids=[{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, copies=1, rse_expression=dst_rse['rse'], grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID %s:%s' % (dst_rse['rse'], scope, tmp_name)) session.commit() except: session.rollback() logging.critical(traceback.format_exc()) i += 1 logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def test_add_and_delete_bad_replicas(rse_factory, mock_scope, root_account, did_client, vo): """ REPLICA (CORE): Add bad replicas and delete them""" # Adding replicas to deterministic RSE nbfiles = 5 rse1, rse1_id = rse_factory.make_srm_rse(deterministic=True, vo=vo) files = [{ 'scope': mock_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] client_files = [{ 'scope': file_['scope'].external, 'name': file_['name'] } for file_ in files] add_replicas(rse_id=rse1_id, files=files, account=root_account, ignore_availability=True) tmp_dsn = 'dataset_%s' % generate_uuid() did_client.add_dataset(scope=mock_scope.external, name=tmp_dsn) did_client.add_files_to_dataset(mock_scope.external, name=tmp_dsn, files=client_files, rse=rse1) # Declare replica bad replicas = [] for replica in list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE } for f in files], schemes=['srm']): replicas.extend(replica['rses'][rse1_id]) r = declare_bad_file_replicas(replicas, 'This is a good reason', root_account) assert r == {} # Check state of bad replicas list_bad_rep = [{ 'scope': rep['scope'].external, 'name': rep['name'] } for rep in list_bad_replicas_status( state=BadFilesStatus.BAD, rse_id=rse1_id, vo=vo)] for rep in client_files: assert rep in list_bad_rep assert [ rep for rep in list_bad_replicas_status( state=BadFilesStatus.DELETED, rse_id=rse1_id, vo=vo) ] == [] # Now delete the dataset delete_dids([{ 'scope': mock_scope, 'name': tmp_dsn, 'did_type': DIDType.DATASET, 'purge_replicas': True }], account=root_account) assert [ rep for rep in list_bad_replicas_status( state=BadFilesStatus.BAD, rse_id=rse1_id, vo=vo) ] == [] list_deleted_rep = [{ 'scope': rep['scope'].external, 'name': rep['name'] } for rep in list_bad_replicas_status( state=BadFilesStatus.DELETED, rse_id=rse1_id, vo=vo)] for rep in client_files: assert rep in list_deleted_rep
def request_transfer(once=False, src=None, dst=None): """ Main loop to request a new transfer. """ logging.info('request: starting') site_a = 'RSE%s' % generate_uuid().upper() site_b = 'RSE%s' % generate_uuid().upper() scheme = 'https' impl = 'rucio.rse.protocols.webdav.Default' if not src.startswith('https://'): scheme = 'srm' impl = 'rucio.rse.protocols.srm.Default' srctoken = src.split(':')[0] dsttoken = dst.split(':')[0] tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_a) tmp_proto['hostname'] = src.split(':')[1][2:] tmp_proto['port'] = src.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': srctoken, 'web_service_path': ''} rse.add_protocol(site_a, tmp_proto) tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_b) tmp_proto['hostname'] = dst.split(':')[1][2:] tmp_proto['port'] = dst.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': dsttoken, 'web_service_path': ''} rse.add_protocol(site_b, tmp_proto) si = rsemanager.get_rse_info(site_a) session = get_session() logging.info('request: started') while not graceful_stop.is_set(): try: ts = time.time() tmp_name = generate_uuid() # add a new dataset did.add_did(scope='mock', name='dataset-%s' % tmp_name, type=DIDType.DATASET, account='root', session=session) # construct PFN pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name] # create the directories if needed p = rsemanager.create_protocol(si, operation='write', scheme=scheme) p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session) break # add the replica replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account='root', session=session) # to the dataset did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock', 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes')}], account='root', session=session) # add rule for the dataset ts = time.time() rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', copies=1, rse_expression=site_b, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name)) record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000) record_counter('daemons.mock.conveyorinjector.request_transfer') session.commit() except: session.rollback() logging.critical(traceback.format_exc()) if once: return logging.info('request: graceful stop requested') logging.info('request: graceful stop done')