Esempio n. 1
0
def undertaker(worker_number=1, total_workers=1, chunk_size=5, once=False):
    """
    Main loop to select and delete dids.
    """
    logging.info('Undertaker(%s): starting' % worker_number)
    logging.info('Undertaker(%s): started' % worker_number)
    while not graceful_stop.is_set():
        try:
            dids = list_expired_dids(worker_number=worker_number, total_workers=total_workers, limit=10000)
            if not dids and not once:
                logging.info('Undertaker(%s): Nothing to do. sleep 60.' % worker_number)
                time.sleep(60)
                continue

            for chunk in chunks(dids, chunk_size):
                try:
                    logging.info('Undertaker(%s): Receive %s dids to delete' % (worker_number, len(chunk)))
                    delete_dids(dids=chunk, account='root')
                    logging.info('Undertaker(%s): Delete %s dids' % (worker_number, len(chunk)))
                    record_counter(counters='undertaker.delete_dids',  delta=len(chunk))
                except DatabaseException, e:
                    logging.error('Undertaker(%s): Got database error %s.' % (worker_number, str(e)))
        except:
            logging.error(traceback.format_exc())
            time.sleep(1)

        if once:
            break

    logging.info('Undertaker(%s): graceful stop requested' % worker_number)
    logging.info('Undertaker(%s): graceful stop done' % worker_number)
Esempio n. 2
0
 def test_delete_dids(self):
     """ DATA IDENTIFIERS (CORE): Delete dids """
     tmp_scope = 'mock'
     dsns = [{'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'did_type': DIDType.DATASET} for i in xrange(5)]
     for dsn in dsns:
         add_did(scope=tmp_scope, name=dsn['name'], type='DATASET', account='root')
     delete_dids(dids=dsns, account='root')
Esempio n. 3
0
def undertaker(worker_number=1, total_workers=1, chunk_size=5, once=False):
    """
    Main loop to select and delete dids.
    """
    logging.info('Undertaker(%s): starting', worker_number)
    logging.info('Undertaker(%s): started', worker_number)
    executable = 'undertaker'
    hostname = socket.gethostname()
    pid = os.getpid()
    thread = threading.current_thread()
    sanity_check(executable=executable, hostname=hostname)

    paused_dids = {}  # {(scope, name): datetime}

    while not GRACEFUL_STOP.is_set():
        try:
            heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, older_than=6000)
            logging.info('Undertaker({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}'.format(locals()))

            # Refresh paused dids
            iter_paused_dids = deepcopy(paused_dids)
            for key in iter_paused_dids:
                if datetime.utcnow() > paused_dids[key]:
                    del paused_dids[key]

            dids = list_expired_dids(worker_number=heartbeat['assign_thread'], total_workers=heartbeat['nr_threads'], limit=10000)

            dids = [did for did in dids if (did['scope'], did['name']) not in paused_dids]

            if not dids and not once:
                logging.info('Undertaker(%s): Nothing to do. sleep 60.', worker_number)
                time.sleep(60)
                continue

            for chunk in chunks(dids, chunk_size):
                try:
                    logging.info('Undertaker(%s): Receive %s dids to delete', worker_number, len(chunk))
                    delete_dids(dids=chunk, account=InternalAccount('root'), expire_rules=True)
                    logging.info('Undertaker(%s): Delete %s dids', worker_number, len(chunk))
                    record_counter(counters='undertaker.delete_dids', delta=len(chunk))
                except RuleNotFound as error:
                    logging.error(error)
                except (DatabaseException, DatabaseError, UnsupportedOperation) as e:
                    if match('.*ORA-00054.*', str(e.args[0])) or match('.*55P03.*', str(e.args[0])) or match('.*3572.*', str(e.args[0])):
                        for did in chunk:
                            paused_dids[(did['scope'], did['name'])] = datetime.utcnow() + timedelta(seconds=randint(600, 2400))
                        record_counter('undertaker.delete_dids.exceptions.LocksDetected')
                        logging.warning('undertaker[%s/%s]: Locks detected for chunk', heartbeat['assign_thread'], heartbeat['nr_threads'])
                    else:
                        logging.error('Undertaker(%s): Got database error %s.', worker_number, str(e))
        except:
            logging.critical(traceback.format_exc())
            time.sleep(1)

        if once:
            break

    die(executable=executable, hostname=hostname, pid=pid, thread=thread)
    logging.info('Undertaker(%s): graceful stop requested', worker_number)
    logging.info('Undertaker(%s): graceful stop done', worker_number)
Esempio n. 4
0
def undertaker(worker_number=1, total_workers=1, chunk_size=5, once=False):
    """
    Main loop to select and delete dids.
    """
    logging.info('Undertaker(%s): starting', worker_number)
    logging.info('Undertaker(%s): started', worker_number)
    hostname = socket.gethostname()
    pid = os.getpid()
    thread = threading.current_thread()
    sanity_check(executable='rucio-undertaker', hostname=hostname)
    while not GRACEFUL_STOP.is_set():
        try:
            heartbeat = live(executable='rucio-undertaker',
                             hostname=hostname,
                             pid=pid,
                             thread=thread,
                             older_than=6000)
            logging.info(
                'Undertaker({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}'
                .format(locals()))

            dids = list_expired_dids(worker_number=heartbeat['assign_thread'] +
                                     1,
                                     total_workers=heartbeat['nr_threads'],
                                     limit=10000)
            if not dids and not once:
                logging.info('Undertaker(%s): Nothing to do. sleep 60.',
                             worker_number)
                time.sleep(60)
                continue

            for chunk in chunks(dids, chunk_size):
                try:
                    logging.info('Undertaker(%s): Receive %s dids to delete',
                                 worker_number, len(chunk))
                    delete_dids(dids=chunk, account='root', expire_rules=True)
                    logging.info('Undertaker(%s): Delete %s dids',
                                 worker_number, len(chunk))
                    record_counter(counters='undertaker.delete_dids',
                                   delta=len(chunk))
                except RuleNotFound as error:
                    logging.error(error)
                except DatabaseException as error:
                    logging.error('Undertaker(%s): Got database error %s.',
                                  worker_number, str(error))
        except:
            logging.critical(traceback.format_exc())
            time.sleep(1)

        if once:
            break

    die(executable='rucio-undertaker',
        hostname=hostname,
        pid=pid,
        thread=thread)
    logging.info('Undertaker(%s): graceful stop requested', worker_number)
    logging.info('Undertaker(%s): graceful stop done', worker_number)
def testdid(vo):
    did_name = 'testdid_%s' % generate_uuid()
    mock_scope = InternalScope('mock', vo=vo)
    didtype = 'DATASET'
    account = InternalAccount('root', vo=vo)

    add_did(scope=mock_scope, name=did_name, type=didtype, account=account)
    yield {'name': did_name, 'scope': mock_scope}
    delete_dids(dids=[{'name': did_name, 'scope': mock_scope, 'did_type': didtype, 'purge_replicas': True}], account=account)
Esempio n. 6
0
 def test_delete_dids(self):
     """ DATA IDENTIFIERS (CORE): Delete dids """
     tmp_scope = 'mock'
     dsns = [{'name': 'dsn_%s' % generate_uuid(),
              'scope': tmp_scope,
              'purge_replicas': False,
              'did_type': DIDType.DATASET} for i in range(5)]
     for dsn in dsns:
         add_did(scope=tmp_scope, name=dsn['name'], type='DATASET', account='root')
     delete_dids(dids=dsns, account='root')
Esempio n. 7
0
def run_once(paused_dids: Dict[Tuple, datetime], chunk_size: int,
             heartbeat_handler: HeartbeatHandler, **_kwargs):
    worker_number, total_workers, logger = heartbeat_handler.live()

    try:
        # Refresh paused dids
        iter_paused_dids = deepcopy(paused_dids)
        for key in iter_paused_dids:
            if datetime.utcnow() > paused_dids[key]:
                del paused_dids[key]

        dids = list_expired_dids(worker_number=worker_number,
                                 total_workers=total_workers,
                                 limit=10000)

        dids = [
            did for did in dids
            if (did['scope'], did['name']) not in paused_dids
        ]

        if not dids:
            logger(logging.INFO, 'did not get any work')
            return

        for chunk in chunks(dids, chunk_size):
            _, _, logger = heartbeat_handler.live()
            try:
                logger(logging.INFO, 'Receive %s dids to delete', len(chunk))
                delete_dids(dids=chunk,
                            account=InternalAccount('root', vo='def'),
                            expire_rules=True)
                logger(logging.INFO, 'Delete %s dids', len(chunk))
                record_counter(name='undertaker.delete_dids', delta=len(chunk))
            except RuleNotFound as error:
                logger(logging.ERROR, error)
            except (DatabaseException, DatabaseError,
                    UnsupportedOperation) as e:
                if match('.*ORA-00054.*', str(e.args[0])) or match(
                        '.*55P03.*', str(e.args[0])) or match(
                            '.*3572.*', str(e.args[0])):
                    for did in chunk:
                        paused_dids[(
                            did['scope'],
                            did['name'])] = datetime.utcnow() + timedelta(
                                seconds=randint(600, 2400))
                    record_counter(
                        'undertaker.delete_dids.exceptions.{exception}',
                        labels={'exception': 'LocksDetected'})
                    logger(logging.WARNING, 'Locks detected for chunk')
                else:
                    logger(logging.ERROR, 'Got database error %s.', str(e))
    except:
        logging.critical(traceback.format_exc())
Esempio n. 8
0
def dataset(db_session, vo):
    scope = InternalScope(scope='mock', vo=vo)
    name = generate_uuid()
    account = InternalAccount('root', vo=vo)

    kwargs = {'scope': scope, 'name': name, 'did_type': DIDType.DATASET, 'account': account}
    add_did(**kwargs, session=db_session)
    db_session.commit()

    yield kwargs

    del kwargs['account']
    kwargs['purge_replicas'] = True
    delete_dids(dids=[kwargs], account=account)
    db_session.commit()
Esempio n. 9
0
def request_transfer(loop=1,
                     src=None,
                     dst=None,
                     upload=False,
                     same_src=False,
                     same_dst=False):
    """
    Main loop to request a new transfer.
    """

    logging.info('request: starting')

    session = get_session()
    src_rse = generate_rse(
        src, ''.join(random.sample(string.ascii_letters.upper(), 8)))
    dst_rse = generate_rse(
        dst, ''.join(random.sample(string.ascii_letters.upper(), 8)))

    logging.info('request: started')

    i = 0
    while not graceful_stop.is_set():

        if i >= loop:
            return

        try:

            if not same_src:
                src_rse = generate_rse(
                    src, ''.join(random.sample(string.ascii_letters.upper(),
                                               8)))

            if not same_dst:
                dst_rse = generate_rse(
                    dst, ''.join(random.sample(string.ascii_letters.upper(),
                                               8)))

            tmp_name = generate_uuid()

            # add a new dataset
            scope = InternalScope('mock')
            account = InternalAccount('root')
            did.add_did(scope=scope,
                        name='dataset-%s' % tmp_name,
                        type=DIDType.DATASET,
                        account=account,
                        session=session)

            # construct PFN
            pfn = rsemanager.lfns2pfns(src_rse,
                                       lfns=[{
                                           'scope': scope.external,
                                           'name': 'file-%s' % tmp_name
                                       }])['%s:file-%s' %
                                           (scope.external, tmp_name)]

            if upload:
                # create the directories if needed
                p = rsemanager.create_protocol(src_rse,
                                               operation='write',
                                               scheme='srm')
                p.connect()
                try:
                    p.mkdir(pfn)
                except:
                    pass

                # upload the test file
                try:
                    fp = os.path.dirname(config_get('injector', 'file'))
                    fn = os.path.basename(config_get('injector', 'file'))
                    p.put(fn, pfn, source_dir=fp)
                except:
                    logging.critical(
                        'Could not upload, removing temporary DID: %s' %
                        str(sys.exc_info()))
                    did.delete_dids([{
                        'scope': scope,
                        'name': 'dataset-%s' % tmp_name
                    }],
                                    account=account,
                                    session=session)
                    break

            # add the replica
            replica.add_replica(rse_id=src_rse['id'],
                                scope=scope,
                                name='file-%s' % tmp_name,
                                bytes=config_get_int('injector', 'bytes'),
                                adler32=config_get('injector', 'adler32'),
                                md5=config_get('injector', 'md5'),
                                account=account,
                                session=session)
            logging.info('added replica on %s for DID mock:%s' %
                         (src_rse['rse'], tmp_name))

            # to the dataset
            did.attach_dids(scope=scope,
                            name='dataset-%s' % tmp_name,
                            dids=[{
                                'scope': scope,
                                'name': 'file-%s' % tmp_name,
                                'bytes': config_get('injector', 'bytes')
                            }],
                            account=account,
                            session=session)

            # add rule for the dataset
            rule.add_rule(dids=[{
                'scope': scope,
                'name': 'dataset-%s' % tmp_name
            }],
                          account=account,
                          copies=1,
                          rse_expression=dst_rse['rse'],
                          grouping='ALL',
                          weight=None,
                          lifetime=None,
                          locked=False,
                          subscription_id=None,
                          activity='mock-injector',
                          session=session)
            logging.info('added rule for %s for DID %s:%s' %
                         (dst_rse['rse'], scope, tmp_name))

            session.commit()
        except:
            session.rollback()
            logging.critical(traceback.format_exc())

        i += 1

    logging.info('request: graceful stop requested')

    logging.info('request: graceful stop done')
Esempio n. 10
0
def test_add_and_delete_bad_replicas(rse_factory, mock_scope, root_account,
                                     did_client, vo):
    """ REPLICA (CORE): Add bad replicas and delete them"""
    # Adding replicas to deterministic RSE
    nbfiles = 5
    rse1, rse1_id = rse_factory.make_srm_rse(deterministic=True, vo=vo)
    files = [{
        'scope': mock_scope,
        'name': 'file_%s' % generate_uuid(),
        'bytes': 1,
        'adler32': '0cc737eb',
        'meta': {
            'events': 10
        }
    } for _ in range(nbfiles)]
    client_files = [{
        'scope': file_['scope'].external,
        'name': file_['name']
    } for file_ in files]
    add_replicas(rse_id=rse1_id,
                 files=files,
                 account=root_account,
                 ignore_availability=True)
    tmp_dsn = 'dataset_%s' % generate_uuid()
    did_client.add_dataset(scope=mock_scope.external, name=tmp_dsn)
    did_client.add_files_to_dataset(mock_scope.external,
                                    name=tmp_dsn,
                                    files=client_files,
                                    rse=rse1)

    # Declare replica bad
    replicas = []
    for replica in list_replicas(dids=[{
            'scope': f['scope'],
            'name': f['name'],
            'type': DIDType.FILE
    } for f in files],
                                 schemes=['srm']):
        replicas.extend(replica['rses'][rse1_id])
    r = declare_bad_file_replicas(replicas, 'This is a good reason',
                                  root_account)
    assert r == {}

    # Check state of bad replicas
    list_bad_rep = [{
        'scope': rep['scope'].external,
        'name': rep['name']
    } for rep in list_bad_replicas_status(
        state=BadFilesStatus.BAD, rse_id=rse1_id, vo=vo)]
    for rep in client_files:
        assert rep in list_bad_rep
    assert [
        rep for rep in list_bad_replicas_status(
            state=BadFilesStatus.DELETED, rse_id=rse1_id, vo=vo)
    ] == []

    # Now delete the dataset
    delete_dids([{
        'scope': mock_scope,
        'name': tmp_dsn,
        'did_type': DIDType.DATASET,
        'purge_replicas': True
    }],
                account=root_account)
    assert [
        rep for rep in list_bad_replicas_status(
            state=BadFilesStatus.BAD, rse_id=rse1_id, vo=vo)
    ] == []
    list_deleted_rep = [{
        'scope': rep['scope'].external,
        'name': rep['name']
    } for rep in list_bad_replicas_status(
        state=BadFilesStatus.DELETED, rse_id=rse1_id, vo=vo)]
    for rep in client_files:
        assert rep in list_deleted_rep
Esempio n. 11
0
def request_transfer(once=False, src=None, dst=None):
    """
    Main loop to request a new transfer.
    """

    logging.info('request: starting')

    site_a = 'RSE%s' % generate_uuid().upper()
    site_b = 'RSE%s' % generate_uuid().upper()

    scheme = 'https'
    impl = 'rucio.rse.protocols.webdav.Default'
    if not src.startswith('https://'):
        scheme = 'srm'
        impl = 'rucio.rse.protocols.srm.Default'
        srctoken = src.split(':')[0]
        dsttoken = dst.split(':')[0]

    tmp_proto = {
        'impl': impl,
        'scheme': scheme,
        'domains': {
            'lan': {'read': 1, 'write': 1, 'delete': 1},
            'wan': {'read': 1, 'write': 1, 'delete': 1}}}

    rse.add_rse(site_a)
    tmp_proto['hostname'] = src.split(':')[1][2:]
    tmp_proto['port'] = src.split(':')[2].split('/')[0]
    tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:])
    if scheme == 'srm':
        tmp_proto['extended_attributes'] = {'space_token': srctoken,
                                            'web_service_path': ''}
    rse.add_protocol(site_a, tmp_proto)

    tmp_proto = {
        'impl': impl,
        'scheme': scheme,
        'domains': {
            'lan': {'read': 1, 'write': 1, 'delete': 1},
            'wan': {'read': 1, 'write': 1, 'delete': 1}}}

    rse.add_rse(site_b)
    tmp_proto['hostname'] = dst.split(':')[1][2:]
    tmp_proto['port'] = dst.split(':')[2].split('/')[0]
    tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:])
    if scheme == 'srm':
        tmp_proto['extended_attributes'] = {'space_token': dsttoken,
                                            'web_service_path': ''}
    rse.add_protocol(site_b, tmp_proto)

    si = rsemanager.get_rse_info(site_a)

    session = get_session()

    logging.info('request: started')

    while not graceful_stop.is_set():

        try:

            ts = time.time()

            tmp_name = generate_uuid()

            # add a new dataset
            did.add_did(scope='mock', name='dataset-%s' % tmp_name,
                        type=DIDType.DATASET, account='root', session=session)

            # construct PFN
            pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name]

            # create the directories if needed
            p = rsemanager.create_protocol(si, operation='write', scheme=scheme)
            p.connect()
            try:
                p.mkdir(pfn)
            except:
                pass

            # upload the test file
            try:
                fp = os.path.dirname(config_get('injector', 'file'))
                fn = os.path.basename(config_get('injector', 'file'))
                p.put(fn, pfn, source_dir=fp)
            except:
                logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info()))
                did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session)
                break

            # add the replica
            replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name,
                                bytes=config_get_int('injector', 'bytes'),
                                adler32=config_get('injector', 'adler32'),
                                md5=config_get('injector', 'md5'),
                                account='root', session=session)

            # to the dataset
            did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock',
                                                                               'name': 'file-%s' % tmp_name,
                                                                               'bytes': config_get('injector', 'bytes')}],
                            account='root', session=session)

            # add rule for the dataset
            ts = time.time()

            rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}],
                          account='root',
                          copies=1,
                          rse_expression=site_b,
                          grouping='ALL',
                          weight=None,
                          lifetime=None,
                          locked=False,
                          subscription_id=None,
                          activity='mock-injector',
                          session=session)

            logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name))
            record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000)

            record_counter('daemons.mock.conveyorinjector.request_transfer')

            session.commit()
        except:
            session.rollback()
            logging.critical(traceback.format_exc())

        if once:
            return

    logging.info('request: graceful stop requested')

    logging.info('request: graceful stop done')