def test_clean_and_get_collection_replica_updates(self): """ REPLICA (CORE): Get cleaned update requests for collection replicas. """ dataset_name_with_collection_replica = 'dataset_with_rse%s' % generate_uuid() dataset_name_without_collection_replica = 'dataset_without_rse%s' % generate_uuid() add_dids(dids=[{'name': dataset_name_without_collection_replica, 'scope': self.scope, 'type': constants.DIDType.DATASET}, {'name': dataset_name_with_collection_replica, 'scope': self.scope, 'type': constants.DIDType.DATASET}], account=self.account, session=self.db_session) self.db_session.query(models.UpdatedCollectionReplica).delete() # pylint: disable=no-member self.db_session.commit() # pylint: disable=no-member # setup test data - 4 without corresponding replica, 4 duplicates and 2 correct models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, bytes=10, length=0, available_replicas_cnt=0, state=constants.ReplicaState.AVAILABLE, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET)\ .save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) cleaned_collection_replica_updates = get_cleaned_updated_collection_replicas(total_workers=0, worker_number=0, session=self.db_session) assert len(cleaned_collection_replica_updates) == 2 for update_request in cleaned_collection_replica_updates: update_request = self.db_session.query(models.UpdatedCollectionReplica).filter_by(id=update_request['id']).one() # pylint: disable=no-member assert update_request.scope == self.scope assert update_request.name in (dataset_name_with_collection_replica, dataset_name_without_collection_replica)
def run_once(heartbeat_handler, limit, **_kwargs): worker_number, total_workers, logger = heartbeat_handler.live() # Select a bunch of collection replicas for to update for this worker start = time.time() # NOQA replicas = get_cleaned_updated_collection_replicas( total_workers=total_workers - 1, worker_number=worker_number, limit=limit) logger( logging.DEBUG, 'Index query time %f size=%d' % (time.time() - start, len(replicas))) # If the list is empty, sent the worker to sleep if not replicas: logger(logging.INFO, 'did not get any work') must_sleep = True return must_sleep for replica in replicas: worker_number, total_workers, logger = heartbeat_handler.live() if graceful_stop.is_set(): break start_time = time.time() update_collection_replica(replica) logger( logging.DEBUG, 'update of collection replica "%s" took %f' % (replica['id'], time.time() - start_time)) must_sleep = False if limit and len(replicas) < limit: must_sleep = True return must_sleep
def test_abacus_collection_replica_cleanup(self): """ ABACUS (COLLECTION REPLICA): Test if the cleanup procedure works correctly. """ collection_replica.run(once=True) db_session = session.get_session() rse1 = rse_name_generator() rse_id1 = add_rse(rse1, **self.vo) rse2 = rse_name_generator() rse_id2 = add_rse(rse2, **self.vo) scope = InternalScope('mock', **self.vo) dataset = 'dataset_%s' % generate_uuid() jdoe = InternalAccount('jdoe', **self.vo) add_did(scope, dataset, DIDType.DATASET, jdoe) models.CollectionReplica(scope=scope, name=dataset, rse_id=rse_id1, state=ReplicaState.AVAILABLE, bytes=1).save(session=db_session, flush=False) models.CollectionReplica(scope=scope, name=dataset, rse_id=rse_id2, state=ReplicaState.AVAILABLE, bytes=1).save(session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id1, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id1, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id2, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id2, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=None, did_type=DIDType.DATASET).save( session=db_session, flush=False) db_session.commit() assert len(get_cleaned_updated_collection_replicas(1, 1)) == 3 self.did_client.set_metadata(scope.external, dataset, 'lifetime', -1)
def collection_replica_update(once=False, limit=1000): """ Main loop to check and update the collection replicas. """ logging.info('collection_replica_update: starting') logging.info('collection_replica_update: started') # Make an initial heartbeat so that all abacus-collection-replica daemons have the correct worker number on the next try executable = 'abacus-collection-replica' hostname = socket.gethostname() pid = os.getpid() current_thread = threading.current_thread() live(executable=executable, hostname=hostname, pid=pid, thread=current_thread) while not graceful_stop.is_set(): try: # Heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=current_thread) # Select a bunch of collection replicas for to update for this worker start = time.time() # NOQA replicas = get_cleaned_updated_collection_replicas( total_workers=heartbeat['nr_threads'] - 1, worker_number=heartbeat['assign_thread'], limit=limit) logging.debug('Index query time %f size=%d' % (time.time() - start, len(replicas))) # If the list is empty, sent the worker to sleep if not replicas and not once: logging.info( 'collection_replica_update[%s/%s] did not get any work' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1)) time.sleep(10) else: for replica in replicas: if graceful_stop.is_set(): break start_time = time.time() update_collection_replica(replica) logging.debug( 'collection_replica_update[%s/%s]: update of collection replica "%s" took %f' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, replica['id'], time.time() - start_time)) if limit and len(replicas) < limit and not once: time.sleep(10) except Exception: logging.error(traceback.format_exc()) if once: break logging.info('collection_replica_update: graceful stop requested') die(executable=executable, hostname=hostname, pid=pid, thread=current_thread) logging.info('collection_replica_update: graceful stop done')