def run_once(heartbeat_handler, limit, **_kwargs): worker_number, total_workers, logger = heartbeat_handler.live() # Select a bunch of collection replicas for to update for this worker start = time.time() # NOQA replicas = get_cleaned_updated_collection_replicas( total_workers=total_workers - 1, worker_number=worker_number, limit=limit) logger( logging.DEBUG, 'Index query time %f size=%d' % (time.time() - start, len(replicas))) # If the list is empty, sent the worker to sleep if not replicas: logger(logging.INFO, 'did not get any work') must_sleep = True return must_sleep for replica in replicas: worker_number, total_workers, logger = heartbeat_handler.live() if graceful_stop.is_set(): break start_time = time.time() update_collection_replica(replica) logger( logging.DEBUG, 'update of collection replica "%s" took %f' % (replica['id'], time.time() - start_time)) must_sleep = False if limit and len(replicas) < limit: must_sleep = True return must_sleep
def test_update_collection_replica(self): """ REPLICA (CORE): Update collection replicas from update requests. """ file_size = 2 files = [{ 'name': 'file_%s' % generate_uuid(), 'scope': self.scope, 'bytes': file_size } for i in range(0, 2)] dataset_name = 'dataset_test_%s' % generate_uuid() add_replicas(rse_id=self.rse_id, files=files, account=self.account, session=self.db_session) add_did(scope=self.scope, name=dataset_name, type=constants.DIDType.DATASET, account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=files, account=self.account, session=self.db_session) models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, state=constants.ReplicaState.AVAILABLE, name=dataset_name, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files), available_replicas_cnt=0)\ .save(session=self.db_session) # Update request with rse id # First update -> dataset replica should be available models.UpdatedCollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( id=update_request.id).first() # pylint: disable=no-member assert update_request is None dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete one file replica -> dataset replica should be unavailable delete_replicas(rse_id=self.rse_id, files=[files[0]], session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Add one file replica -> dataset replica should be available again add_replicas(rse_id=self.rse_id, files=[files[0]], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete all file replicas -> dataset replica should be deleted delete_replicas(rse_id=self.rse_id, files=files, session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).all() # pylint: disable=no-member assert len(dataset_replica) == 0 # Update request without rse_id - using two replicas per file -> total 4 replicas add_replicas(rse_id=self.rse_id, files=files, account=self.account, session=self.db_session) add_replicas(rse_id=self.rse2_id, files=files, account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=files, account=self.account, session=self.db_session) models.CollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, state=constants.ReplicaState.UNAVAILABLE, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files)).save(session=self.db_session) models.CollectionReplica( rse_id=self.rse2_id, scope=self.scope, name=dataset_name, state=constants.ReplicaState.UNAVAILABLE, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files)).save(session=self.db_session) # First update -> replicas should be available models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) for dataset_replica in self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).all(): # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete first replica on first RSE -> replica on first RSE should be unavailable, replica on second RSE should be still available delete_replicas(rse_id=self.rse_id, files=[files[0]], session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) # delete_replica creates also update object but with rse_id -> extra filter for rse_id is NULL update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Set the state of the first replica on the second RSE to UNAVAILABLE -> both replicass should be unavailable file_replica = self.db_session.query( models.RSEFileAssociation).filter_by(rse_id=self.rse2_id, scope=self.scope, name=files[0]['name']).one() # pylint: disable=no-member file_replica.state = constants.ReplicaState.UNAVAILABLE models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Delete first replica on second RSE -> file is not longer part of dataset -> both replicas should be available delete_replicas(rse_id=self.rse2_id, files=[files[0]], session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == (len(files) - 1) * file_size assert dataset_replica['length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == (len(files) - 1) * file_size assert dataset_replica['length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.AVAILABLE # Add first replica to the first RSE -> first replicas should be available add_replicas(rse_id=self.rse_id, files=[files[0]], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Add first replica to the second RSE -> both replicas should be available again add_replicas(rse_id=self.rse2_id, files=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE
def collection_replica_update(once=False, limit=1000): """ Main loop to check and update the collection replicas. """ logging.info('collection_replica_update: starting') logging.info('collection_replica_update: started') # Make an initial heartbeat so that all abacus-collection-replica daemons have the correct worker number on the next try executable = 'abacus-collection-replica' hostname = socket.gethostname() pid = os.getpid() current_thread = threading.current_thread() live(executable=executable, hostname=hostname, pid=pid, thread=current_thread) while not graceful_stop.is_set(): try: # Heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=current_thread) # Select a bunch of collection replicas for to update for this worker start = time.time() # NOQA replicas = get_cleaned_updated_collection_replicas( total_workers=heartbeat['nr_threads'] - 1, worker_number=heartbeat['assign_thread'], limit=limit) logging.debug('Index query time %f size=%d' % (time.time() - start, len(replicas))) # If the list is empty, sent the worker to sleep if not replicas and not once: logging.info( 'collection_replica_update[%s/%s] did not get any work' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1)) time.sleep(10) else: for replica in replicas: if graceful_stop.is_set(): break start_time = time.time() update_collection_replica(replica) logging.debug( 'collection_replica_update[%s/%s]: update of collection replica "%s" took %f' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, replica['id'], time.time() - start_time)) if limit and len(replicas) < limit and not once: time.sleep(10) except Exception: logging.error(traceback.format_exc()) if once: break logging.info('collection_replica_update: graceful stop requested') die(executable=executable, hostname=hostname, pid=pid, thread=current_thread) logging.info('collection_replica_update: graceful stop done')