def __init__(self, datatypes, dest_rse_expr, max_bytes_hour, max_files_hour, max_bytes_hour_rse, max_files_hour_rse, min_popularity, min_recent_requests, max_replicas): self._fsc = FreeSpaceCollector() self._nmc = NetworkMetricsCollector() self._added_cache = ExpiringDatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400) self._dc = DatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400) self._added_bytes = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_bytes_") self._added_files = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_files_") self._datatypes = datatypes.split(',') self._dest_rse_expr = dest_rse_expr self._max_bytes_hour = max_bytes_hour self._max_files_hour = max_files_hour self._max_bytes_hour_rse = max_bytes_hour_rse self._max_files_hour_rse = max_files_hour_rse self._min_popularity = min_popularity self._min_recent_requests = min_recent_requests self._max_replicas = max_replicas rses = parse_expression(self._dest_rse_expr) self._rses = {} self._sites = {} for rse in rses: rse_attrs = list_rse_attributes(rse['rse']) rse_attrs['rse'] = rse['rse'] self._rses[rse['rse']] = rse_attrs self._sites[rse_attrs['site']] = rse_attrs self._dst_penalties = {} self._src_penalties = {} self._print_params()
def consumer(id, num_thread=1): """ Main loop to consume messages from the Rucio Cache producer. """ logging.info('Rucio Cache consumer starting') brokers_alias = [] brokers_resolved = [] try: brokers_alias = [b.strip() for b in config_get('messaging-cache', 'brokers').split(',')] except: raise Exception('Could not load rucio cache brokers from configuration') logging.info('resolving rucio cache broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')]) brokers_resolved = [item for sublist in brokers_resolved for item in sublist] logging.debug('Rucio cache brokers resolved to %s', brokers_resolved) conns = {} for broker in brokers_resolved: conn = stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-cache', 'port'))], use_ssl=True, ssl_key_file=config_get('messaging-cache', 'ssl_key_file'), ssl_cert_file=config_get('messaging-cache', 'ssl_cert_file'), ssl_version=ssl.PROTOCOL_TLSv1) conns[conn] = Consumer(conn.transport._Transport__host_and_ports[0], account=config_get('messaging-cache', 'account'), id=id, num_thread=num_thread) logging.info('consumer started') while not GRACEFUL_STOP.is_set(): for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.messaging.cache.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.set_listener('rucio-cache-messaging', conns[conn]) conn.start() conn.connect() conn.subscribe(destination=config_get('messaging-cache', 'destination'), id='rucio-cache-messaging', ack='auto') time.sleep(1) logging.info('graceful stop requested') for conn in conns: try: conn.disconnect() except: pass logging.info('graceful stop done')
def consumer(id_, num_thread=1): """ Main loop to consume messages from the Rucio Cache producer. """ logging.info('Rucio Cache consumer starting') brokers_alias = [] brokers_resolved = [] try: brokers_alias = [b.strip() for b in config_get('messaging-cache', 'brokers').split(',')] except: raise Exception('Could not load rucio cache brokers from configuration') logging.info('resolving rucio cache broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP) brokers_resolved.extend(ai[4][0] for ai in addrinfos) logging.debug('Rucio cache brokers resolved to %s', brokers_resolved) conns = {} for broker in brokers_resolved: conn = stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-cache', 'port'))], use_ssl=True, ssl_key_file=config_get('messaging-cache', 'ssl_key_file'), ssl_cert_file=config_get('messaging-cache', 'ssl_cert_file'), vhost=config_get('messaging-cache', 'broker_virtual_host', raise_exception=False) ) conns[conn] = Consumer(conn.transport._Transport__host_and_ports[0], account=config_get('messaging-cache', 'account'), id_=id_, num_thread=num_thread) logging.info('consumer started') while not GRACEFUL_STOP.is_set(): for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.messaging.cache.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.set_listener('rucio-cache-messaging', conns[conn]) conn.connect() conn.subscribe(destination=config_get('messaging-cache', 'destination'), id='rucio-cache-messaging', ack='auto') time.sleep(1) logging.info('graceful stop requested') for conn in conns: try: conn.disconnect() except: pass logging.info('graceful stop done')
def _retry_protocol_stat(self, protocol, pfn): """ Try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail :param protocol The protocol to use to reach this file :param pfn Physical file name of the target for the protocol stat """ retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6) for attempt in range(retries): try: self.logger.debug('stat: pfn=%s' % pfn) stats = protocol.stat(pfn) if int(stats['filesize']) == 0: raise Exception('Filesize came back as 0. Potential storage race condition, need to retry.') return stats except RSEChecksumUnavailable as error: # The stat succeeded here, but the checksum failed raise error except Exception as error: self.logger.debug('stat: unexpected error=%s' % error) fail_str = ['The requested service is not available at the moment', 'Permission refused'] if any(x in str(error) for x in fail_str): raise error self.logger.debug('stat: unknown edge case, retrying in %ss' % 2**attempt) time.sleep(2**attempt) return protocol.stat(pfn)
def kronos_dataset(thread=0, dataset_queue=None, sleep_time=60): logging.info('kronos-dataset[%d/?] starting', thread) executable = 'kronos-dataset' hostname = socket.gethostname() pid = getpid() hb_thread = current_thread() dataset_wait = config_get_int('tracer-kronos', 'dataset_wait') start = datetime.now() sanity_check(executable=executable, hostname=hostname) while not graceful_stop.is_set(): start_time = time() heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'kronos-dataset[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') if (datetime.now() - start).seconds > dataset_wait: __update_datasets(dataset_queue, logger=logger) start = datetime.now() tottime = time() - start_time if tottime < sleep_time: logger(logging.INFO, 'Will sleep for %s seconds' % (sleep_time - tottime)) sleep(sleep_time - tottime) die(executable=executable, hostname=hostname, pid=pid, thread=thread) # once again for the backlog logger(logging.INFO, 'cleaning dataset backlog before shutdown...') __update_datasets(dataset_queue)
def kronos_dataset(once=False, thread=0, dataset_queue=None, sleep_time=60): logging.info('(kronos_dataset) starting') hostname = socket.gethostname() pid = getpid() thread = current_thread() dataset_wait = config_get_int('tracer-kronos', 'dataset_wait') start = datetime.now() sanity_check(executable='kronos-dataset', hostname=hostname) while not graceful_stop.is_set(): start_time = time() live(executable='kronos-dataset', hostname=hostname, pid=pid, thread=thread) if (datetime.now() - start).seconds > dataset_wait: __update_datasets(dataset_queue) start = datetime.now() tottime = time() - start_time if tottime < sleep_time: logging.info('(kronos_dataset) Will sleep for %s seconds' % (sleep_time - tottime)) sleep(sleep_time - tottime) # once again for the backlog die(executable='kronos-dataset', hostname=hostname, pid=pid, thread=thread) logging.info( '(kronos_dataset) cleaning dataset backlog before shutdown...') __update_datasets(dataset_queue)
def run(total_workers=1, once=False, inputfile=None): """ Starts up the automatix threads. """ try: sites = [ s.strip() for s in config_get('automatix', 'sites').split(',') ] except Exception: raise Exception('Could not load sites from configuration') if not inputfile: inputfile = '/opt/rucio/etc/automatix.json' try: sleep_time = config_get_int('automatix', 'sleep_time') except Exception: sleep_time = 3600 try: account = config_get_int('automatix', 'account') except Exception: account = 'root' try: dataset_lifetime = config_get_int('automatix', 'dataset_lifetime') except Exception: dataset_lifetime = None try: set_metadata = config_get_bool('automatix', 'set_metadata') except Exception: set_metadata = False threads = list() for worker_number in range(0, total_workers): kwargs = { 'worker_number': worker_number + 1, 'total_workers': total_workers, 'once': once, 'sites': sites, 'sleep_time': sleep_time, 'account': account, 'inputfile': inputfile, 'set_metadata': set_metadata, 'dataset_lifetime': dataset_lifetime } threads.append(threading.Thread(target=automatix, kwargs=kwargs)) [thread.start() for thread in threads] while threads[0].is_alive(): logging.debug('Still %i active threads' % len(threads)) [thread.join(timeout=3.14) for thread in threads]
def consumer(id, total_threads=1): """ Main loop to consume messages from the FTS3 producer. """ logging.info('consumer starting') brokers_alias = [] brokers_resolved = [] try: brokers_alias = [b.strip() for b in config_get('messaging-fts3', 'brokers').split(',')] except: raise Exception('Could not load brokers from configuration') logging.info('resolving broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')]) brokers_resolved = [item for sublist in brokers_resolved for item in sublist] logging.debug('brokers resolved to %s', brokers_resolved) conns = [] for broker in brokers_resolved: conns.append(stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-fts3', 'port'))], use_ssl=True, ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'), ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'), ssl_version=ssl.PROTOCOL_TLSv1)) logging.info('consumer started') while not graceful_stop.is_set(): for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.messaging.fts3.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.set_listener('rucio-messaging-fts3', Consumer(broker=conn.transport._Transport__host_and_ports[0], id=id, total_threads=total_threads)) conn.start() conn.connect() conn.subscribe(destination=config_get('messaging-fts3', 'destination'), id='rucio-messaging-fts3', ack='auto') time.sleep(1) logging.info('graceful stop requested') for conn in conns: try: conn.disconnect() except: pass logging.info('graceful stop done')
def __init__(self, delete_keys=False): self._avg_jobs = {} self._cur_jobs = {} self._max_jobs = {} self._tms = RedisTimeSeries( config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), config_get_int('c3po-workload', 'window'), 'jobs_') self._request_headers = { "Accept": "application/json", "Content-Type": "application/json" } self._request_url = config_get('c3po-workload', 'panda_url') if delete_keys: self._tms.delete_keys() self.reload_cache()
def kronos_dataset(once=False, process=0, total_processes=1, thread=0, total_threads=1, dataset_queue=None): dataset_wait = config_get_int('tracer-kronos', 'dataset_wait') start = datetime.now() while not graceful_stop.is_set(): if (datetime.now() - start).seconds > dataset_wait: __update_datasets(dataset_queue) start = datetime.now() sleep(10) # once again for the backlog logging.info('cleaning dataset backlog before shutdown...') __update_datasets(dataset_queue)
def __init__(self): self._fsc = FreeSpaceCollector() self._dc = DatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400) rse_expr = "tier=2&type=DATADISK" rse_attrs = parse_expression(rse_expr) self._rses = [] for rse in rse_attrs: self._rses.append(rse['rse']) self.__setup_penalties()
def bulk_submit_xfer(submitjob, recursive=False, logger=logging.log): cfg = load_config(logger=logger) client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id'] auth_client = NativeAppAuthClient(client_id) refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token'] source_endpoint_id = submitjob[0].get('metadata').get( 'source_globus_endpoint_id') destination_endpoint_id = submitjob[0].get('metadata').get( 'dest_globus_endpoint_id') authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token, auth_client=auth_client) tc = TransferClient(authorizer=authorizer) # make job_label for task a timestamp now = datetime.datetime.now() job_label = now.strftime('%Y%m%d%H%M%s') # retrieve globus_task_deadline value to enforce time window to complete transfers # default is 2880 minutes or 48 hours globus_task_deadline = config_get_int('conveyor', 'globus_task_deadline', False, 2880) deadline = now + datetime.timedelta(minutes=globus_task_deadline) # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the # file was transferred correctly. If the checksums do not match, it will redo the transfer of that file. tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum", deadline=str(deadline)) for file in submitjob: source_path = file.get('sources')[0] dest_path = file.get('destinations')[0] filesize = file['metadata']['filesize'] # TODO: support passing a recursive parameter to Globus # md5 = file['metadata']['md5'] # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5) tdata.add_item(source_path, dest_path, recursive=False) record_counter( 'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize', filesize) # logging.info('submitting transfer...') transfer_result = tc.submit_transfer(tdata) logger(logging.INFO, "transfer_result: %s" % transfer_result) return transfer_result["task_id"]
def run(total_workers=1, once=False, inputfile=None): """ Starts up the automatix threads. """ try: sites = [s.strip() for s in config_get('automatix', 'sites').split(',')] except: raise Exception('Could not load sites from configuration') if not inputfile: inputfile = '/opt/rucio/etc/automatix.json' try: sleep_time = config_get_int('automatix', 'sleep_time') except: sleep_time = 3600 try: account = config_get_int('automatix', 'account') except: account = 'root' try: dataset_lifetime = config_get_int('automatix', 'dataset_lifetime') except: dataset_lifetime = None threads = list() for worker_number in xrange(0, total_workers): kwargs = {'worker_number': worker_number + 1, 'total_workers': total_workers, 'once': once, 'sites': sites, 'sleep_time': sleep_time, 'account': account, 'inputfile': inputfile, 'dataset_lifetime': dataset_lifetime} threads.append(threading.Thread(target=automatix, kwargs=kwargs)) [t.start() for t in threads] while threads[0].is_alive(): logging.debug('Still %i active threads' % len(threads)) [t.join(timeout=3.14) for t in threads]
def _retry_protocol_stat(self, protocol, pfn): """ try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail :param protocol The protocol to use to reach this file :param pfn Physical file name of the target for the protocol stat """ retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6) for attempt in range(retries): try: stats = protocol.stat(pfn) return stats except RSEChecksumUnavailable as error: # The stat succeeded here, but the checksum failed raise error except Exception as error: time.sleep(2**attempt) return protocol.stat(pfn)
def __geoip_db(): db_path = Path(f'/tmp/{GEOIP_DB_EDITION}.mmdb') db_expire_delay = timedelta(days=config_get_int( 'core', 'geoip_expire_delay', raise_exception=False, default=30)) must_download = False if not db_path.is_file(): print('%s does not exist. Downloading it.' % db_path) must_download = True elif db_expire_delay and datetime.fromtimestamp( db_path.stat().st_mtime) < datetime.now() - db_expire_delay: print('%s is too old. Re-downloading it.' % db_path) must_download = True if must_download: __download_geoip_db(destination=db_path) return geoip2.database.Reader(str(db_path))
def kronos_dataset(once=False, thread=0, dataset_queue=None): logging.info('(kronos_dataset) starting') hostname = gethostname() pid = getpid() thread = current_thread() dataset_wait = config_get_int('tracer-kronos', 'dataset_wait') start = datetime.now() while not graceful_stop.is_set(): live(executable='kronos-dataset', hostname=hostname, pid=pid, thread=thread) if (datetime.now() - start).seconds > dataset_wait: __update_datasets(dataset_queue) start = datetime.now() sleep(10) # once again for the backlog die(executable='rucio-dataset', hostname=hostname, pid=pid, thread=thread) logging.info('(kronos_dataset) cleaning dataset backlog before shutdown...') __update_datasets(dataset_queue)
CONFIG_TRACE_LOGLEVEL = getattr( logging, config_get('nongrid-trace', 'loglevel', raise_exception=False, default='DEBUG').upper()) CONFIG_TRACE_LOGFORMAT = config_get('nongrid-trace', 'logformat', raise_exception=False, default='%(message)s') CONFIG_TRACE_TRACEDIR = config_get('nongrid-trace', 'tracedir', raise_exception=False, default='/var/log/rucio') CONFIG_TRACE_MAXBYTES = config_get_int('nongrid-trace', 'maxbytes', raise_exception=False, default=1000000000) CONFIG_TRACE_BACKUPCOUNT = config_get_int('nongrid-trace', 'backupCount', raise_exception=False, default=10) # reset root logger handlers. Otherwise everything from ROTATING_LOGGER will also end up in the apache logs. logging.getLogger().handlers = [] LOGGER = logging.getLogger('nongrid_trace') LOGGER.setLevel(CONFIG_COMMON_LOGLEVEL) ROTATING_LOGGER = logging.getLogger('nongrid_trace_buffer') ROTATING_LOGGER.setLevel(CONFIG_TRACE_LOGLEVEL)
def consumer(id, num_thread=1): """ Main loop to consume messages from the Rucio Cache producer. """ logging.info('Rucio Cache consumer starting') brokers_alias = [] brokers_resolved = [] try: brokers_alias = [b.strip() for b in config_get('messaging-cache', 'brokers').split(',')] except: raise Exception('Could not load rucio cache brokers from configuration') logging.info('resolving rucio cache broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')]) brokers_resolved = [item for sublist in brokers_resolved for item in sublist] logging.debug('Rucio cache brokers resolved to %s', brokers_resolved) conns = {} for broker in brokers_resolved: conn = stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-cache', 'port'))], use_ssl=True, ssl_key_file=config_get('messaging-cache', 'ssl_key_file'), ssl_cert_file=config_get('messaging-cache', 'ssl_cert_file'), ssl_version=ssl.PROTOCOL_TLSv1) conns[conn] = Consumer(conn.transport._Transport__host_and_ports[0], account=config_get('messaging-cache', 'account'), id=id, num_thread=num_thread) logging.info('consumer started') while not graceful_stop.is_set(): for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.messaging.cache.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.set_listener('rucio-cache-messaging', conns[conn]) conn.start() conn.connect() conn.subscribe(destination=config_get('messaging-cache', 'destination'), id='rucio-cache-messaging', ack='auto', headers={'selector': 'vo = \'%s\'' % config_get('messaging-cache', 'voname')}) time.sleep(1) logging.info('graceful stop requested') for conn in conns: try: conn.disconnect() except: pass logging.info('graceful stop done')
def kronos_file(thread=0, dataset_queue=None, sleep_time=60): """ Main loop to consume tracer reports. """ logging.info('kronos_file[%i/?] starting', thread) executable = 'kronos-file' hostname = socket.gethostname() pid = getpid() hb_thread = current_thread() chunksize = config_get_int('tracer-kronos', 'chunksize') prefetch_size = config_get_int('tracer-kronos', 'prefetch_size') subscription_id = config_get('tracer-kronos', 'subscription_id') try: bad_files_patterns = [] pattern = get(section='kronos', option='bad_files_patterns', session=None) pattern = str(pattern) patterns = pattern.split(",") for pat in patterns: bad_files_patterns.append(re.compile(pat.strip())) except ConfigNotFound: bad_files_patterns = [] except Exception as error: logging.log(logging.ERROR, 'kronos_file[%i/?] Failed to get bad_file_patterns %s', thread, str(error)) bad_files_patterns = [] use_ssl = True try: use_ssl = config_get_bool('tracer-kronos', 'use_ssl') except Exception: pass if not use_ssl: username = config_get('tracer-kronos', 'username') password = config_get('tracer-kronos', 'password') excluded_usrdns = set( config_get('tracer-kronos', 'excluded_usrdns').split(',')) vhost = config_get('tracer-kronos', 'broker_virtual_host', raise_exception=False) brokers_alias = [ b.strip() for b in config_get('tracer-kronos', 'brokers').split(',') ] port = config_get_int('tracer-kronos', 'port') reconnect_attempts = config_get_int('tracer-kronos', 'reconnect_attempts') ssl_key_file = config_get('tracer-kronos', 'ssl_key_file', raise_exception=False) ssl_cert_file = config_get('tracer-kronos', 'ssl_cert_file', raise_exception=False) sanity_check(executable=executable, hostname=hostname) while not graceful_stop.is_set(): start_time = time() heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'kronos-file[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') conns = __get_broker_conns(brokers=brokers_alias, port=port, use_ssl=use_ssl, vhost=vhost, reconnect_attempts=reconnect_attempts, ssl_key_file=ssl_key_file, ssl_cert_file=ssl_cert_file, timeout=sleep_time, logger=logger) for conn in conns: if not conn.is_connected(): logger( logging.INFO, 'connecting to %s' % str(conn.transport._Transport__host_and_ports[0])) record_counter('daemons.tracer.kronos.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0]) conn.set_listener( 'rucio-tracer-kronos', AMQConsumer( broker=conn.transport._Transport__host_and_ports[0], conn=conn, queue=config_get('tracer-kronos', 'queue'), chunksize=chunksize, subscription_id=subscription_id, excluded_usrdns=excluded_usrdns, dataset_queue=dataset_queue, bad_files_patterns=bad_files_patterns, logger=logger)) if not use_ssl: conn.connect(username, password) else: conn.connect() conn.subscribe( destination=config_get('tracer-kronos', 'queue'), ack='client-individual', id=subscription_id, headers={'activemq.prefetchSize': prefetch_size}) tottime = time() - start_time if tottime < sleep_time: logger(logging.INFO, 'Will sleep for %s seconds' % (sleep_time - tottime)) sleep(sleep_time - tottime) logger(logging.INFO, 'graceful stop requested') for conn in conns: try: conn.disconnect() except Exception: pass die(executable=executable, hostname=hostname, pid=pid, thread=thread) logger(logging.INFO, 'graceful stop done')
def request_transfer(once=False, src=None, dst=None): """ Main loop to request a new transfer. """ logging.info('request: starting') site_a = 'RSE%s' % generate_uuid().upper() site_b = 'RSE%s' % generate_uuid().upper() scheme = 'https' impl = 'rucio.rse.protocols.webdav.Default' if not src.startswith('https://'): scheme = 'srm' impl = 'rucio.rse.protocols.srm.Default' srctoken = src.split(':')[0] dsttoken = dst.split(':')[0] tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_a) tmp_proto['hostname'] = src.split(':')[1][2:] tmp_proto['port'] = src.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': srctoken, 'web_service_path': ''} rse.add_protocol(site_a, tmp_proto) tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_b) tmp_proto['hostname'] = dst.split(':')[1][2:] tmp_proto['port'] = dst.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': dsttoken, 'web_service_path': ''} rse.add_protocol(site_b, tmp_proto) si = rsemanager.get_rse_info(site_a) session = get_session() logging.info('request: started') while not graceful_stop.is_set(): try: ts = time.time() tmp_name = generate_uuid() # add a new dataset did.add_did(scope='mock', name='dataset-%s' % tmp_name, type=DIDType.DATASET, account='root', session=session) # construct PFN pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name] # create the directories if needed p = rsemanager.create_protocol(si, operation='write', scheme=scheme) p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session) break # add the replica replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account='root', session=session) # to the dataset did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock', 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes')}], account='root', session=session) # add rule for the dataset ts = time.time() rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', copies=1, rse_expression=site_b, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name)) record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000) record_counter('daemons.mock.conveyorinjector.request_transfer') session.commit() except: session.rollback() logging.critical(traceback.format_exc()) if once: return logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def submitter(once=False, rses=None, partition_wait_time=10, bulk=100, group_bulk=1, group_policy='rule', source_strategy=None, activities=None, sleep_time=600, max_sources=4, archive_timeout_override=None, filter_transfertool=FILTER_TRANSFERTOOL, transfertool=TRANSFER_TOOL, transfertype=TRANSFER_TYPE, ignore_availability=False): """ Main loop to submit a new transfer primitive to a transfertool. """ try: partition_hash_var = config_get('conveyor', 'partition_hash_var') except NoOptionError: partition_hash_var = None try: scheme = config_get('conveyor', 'scheme') except NoOptionError: scheme = None try: failover_scheme = config_get('conveyor', 'failover_scheme') except NoOptionError: failover_scheme = None try: timeout = config_get('conveyor', 'submit_timeout') timeout = float(timeout) except NoOptionError: timeout = None try: bring_online = config_get_int('conveyor', 'bring_online') except NoOptionError: bring_online = 43200 try: max_time_in_queue = {} timelife_conf = config_get('conveyor', 'max_time_in_queue') timelife_confs = timelife_conf.split(",") for conf in timelife_confs: act, timelife = conf.split(":") max_time_in_queue[act.strip()] = int(timelife.strip()) except NoOptionError: max_time_in_queue = {} if 'default' not in max_time_in_queue: max_time_in_queue['default'] = 168 logging.debug("Maximum time in queue for different activities: %s", max_time_in_queue) logger_prefix = executable = "conveyor-submitter" if activities: activities.sort() executable += '--activities ' + str(activities) if filter_transfertool: executable += ' --filter-transfertool ' + filter_transfertool if rses: rse_ids = [rse['id'] for rse in rses] else: rse_ids = None transfertools = transfertool.split(',') transfertool_kwargs = { FTS3Transfertool: { 'group_policy': group_policy, 'group_bulk': group_bulk, 'source_strategy': source_strategy, 'max_time_in_queue': max_time_in_queue, 'bring_online': bring_online, 'default_lifetime': 172800, 'archive_timeout_override': archive_timeout_override, }, GlobusTransferTool: { 'group_policy': transfertype, 'group_bulk': group_bulk, }, } run_daemon( once=once, graceful_stop=graceful_stop, executable=executable, logger_prefix=logger_prefix, partition_wait_time=partition_wait_time, sleep_time=sleep_time, run_once_fnc=functools.partial( run_once, bulk=bulk, group_bulk=group_bulk, filter_transfertool=filter_transfertool, transfertools=transfertools, ignore_availability=ignore_availability, scheme=scheme, failover_scheme=failover_scheme, partition_hash_var=partition_hash_var, rse_ids=rse_ids, timeout=timeout, transfertool_kwargs=transfertool_kwargs, ), activities=activities, )
def kronos_file(once=False, thread=0, brokers_resolved=None, dataset_queue=None): """ Main loop to consume tracer reports. """ logging.info('tracer consumer starting') hostname = gethostname() pid = getpid() thread = current_thread() chunksize = config_get_int('tracer-kronos', 'chunksize') prefetch_size = config_get_int('tracer-kronos', 'prefetch_size') subscription_id = config_get('tracer-kronos', 'subscription_id') use_ssl = True try: use_ssl = config_get_bool('tracer-kronos', 'use_ssl') except: pass if not use_ssl: username = config_get('tracer-kronos', 'username') password = config_get('tracer-kronos', 'password') excluded_usrdns = set( config_get('tracer-kronos', 'excluded_usrdns').split(',')) conns = [] for broker in brokers_resolved: if not use_ssl: conns.append( Connection(host_and_ports=[ (broker, config_get_int('tracer-kronos', 'port')) ], use_ssl=False, reconnect_attempts_max=config_get_int( 'tracer-kronos', 'reconnect_attempts'))) else: conns.append( Connection(host_and_ports=[ (broker, config_get_int('tracer-kronos', 'port')) ], use_ssl=True, ssl_key_file=config_get('tracer-kronos', 'ssl_key_file'), ssl_cert_file=config_get('tracer-kronos', 'ssl_cert_file'), ssl_version=PROTOCOL_TLSv1, reconnect_attempts_max=config_get_int( 'tracer-kronos', 'reconnect_attempts'))) logging.info('(kronos_file) tracer consumer started') sanity_check(executable='kronos-file', hostname=hostname) while not graceful_stop.is_set(): live(executable='kronos-file', hostname=hostname, pid=pid, thread=thread) for conn in conns: if not conn.is_connected(): logging.info('(kronos_file) connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.tracer.kronos.reconnect.%s' % conn.transport._Transport__host_and_ports[0] [0].split('.')[0]) conn.set_listener( 'rucio-tracer-kronos', AMQConsumer( broker=conn.transport._Transport__host_and_ports[0], conn=conn, queue=config_get('tracer-kronos', 'queue'), chunksize=chunksize, subscription_id=subscription_id, excluded_usrdns=excluded_usrdns, dataset_queue=dataset_queue)) conn.start() if not use_ssl: conn.connect(username, password) else: conn.connect() conn.subscribe( destination=config_get('tracer-kronos', 'queue'), ack='client-individual', id=subscription_id, headers={'activemq.prefetchSize': prefetch_size}) sleep(1) logging.info('(kronos_file) graceful stop requested') for conn in conns: try: conn.disconnect() except: pass die(executable='kronos-file', hostname=hostname, pid=pid, thread=thread) logging.info('(kronos_file) graceful stop done')
def kronos_file(once=False, process=0, total_processes=1, thread=0, total_threads=1, brokers_resolved=None, dataset_queue=None): """ Main loop to consume tracer reports. """ logging.info('tracer consumer starting') chunksize = config_get_int('tracer-kronos', 'chunksize') prefetch_size = config_get_int('tracer-kronos', 'prefetch_size') subscription_id = config_get('tracer-kronos', 'subscription_id') use_ssl = True try: use_ssl = config_get_bool('tracer-kronos', 'use_ssl') except: pass if not use_ssl: username = config_get('tracer-kronos', 'username') password = config_get('tracer-kronos', 'password') excluded_usrdns = set(config_get('tracer-kronos', 'excluded_usrdns').split(',')) conns = [] for broker in brokers_resolved: if not use_ssl: conns.append(Connection(host_and_ports=[(broker, config_get_int('tracer-kronos', 'port'))], use_ssl=False, reconnect_attempts_max=config_get_int('tracer-kronos', 'reconnect_attempts'))) else: conns.append(Connection(host_and_ports=[(broker, config_get_int('tracer-kronos', 'port'))], use_ssl=True, ssl_key_file=config_get('tracer-kronos', 'ssl_key_file'), ssl_cert_file=config_get('tracer-kronos', 'ssl_cert_file'), ssl_version=PROTOCOL_TLSv1, reconnect_attempts_max=config_get_int('tracer-kronos', 'reconnect_attempts'))) logging.info('(kronos_file) tracer consumer started') while not graceful_stop.is_set(): for conn in conns: if not conn.is_connected(): logging.info('(kronos_file) connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.tracer.kronos.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.set_listener('rucio-tracer-kronos', AMQConsumer(broker=conn.transport._Transport__host_and_ports[0], conn=conn, chunksize=chunksize, subscription_id=subscription_id, excluded_usrdns=excluded_usrdns, dataset_queue=dataset_queue)) conn.start() if not use_ssl: conn.connect(username, password) else: conn.connect() conn.subscribe(destination=config_get('tracer-kronos', 'queue'), ack='client-individual', id=subscription_id, headers={'activemq.prefetchSize': prefetch_size}) sleep(1) logging.info('(kronos_file) graceful stop requested') for conn in conns: try: conn.disconnect() except: pass logging.info('(kronos_file) graceful stop done')
logger.setLevel(logging.INFO) handler = logging.handlers.RotatingFileHandler(filename='%s/trace' % config_get('trace', 'tracedir'), maxBytes=1000000000, backupCount=10) logFormatter = logging.Formatter('%(message)s') handler.setFormatter(logFormatter) handler.suffix = "%Y-%m-%d" logger.addHandler(handler) brokers_alias = [] brokers_resolved = [] try: brokers_alias = [b.strip() for b in config_get('trace', 'brokers').split(',')] except: raise Exception('Could not load brokers from configuration') port = config_get_int('trace', 'port') topic = config_get('trace', 'topic') username = config_get('trace', 'username') password = config_get('trace', 'password') logging.getLogger("stomp").setLevel(logging.CRITICAL) brokers_resolved = [] for broker in brokers_alias: brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')]) brokers_resolved = [item for sublist in brokers_resolved for item in sublist] conns = [] for broker in brokers_resolved: conns.append(stomp.Connection(host_and_ports=[(broker, port)], reconnect_attempts_max=3))
def __init__(self): self._r = StrictRedis(host=config_get('c3po-network-metrics', 'redis_host'), port=config_get_int('c3po-network-metrics', 'redis_port')) self._prefix = config_get('c3po-network-metrics', 'prefix')
def deliver_messages(once=False, brokers_resolved=None, thread=0, bulk=1000, delay=10, broker_timeout=3, broker_retry=3): ''' Main loop to deliver messages to a broker. ''' logging.info('[broker] starting - threads (%i) bulk (%i)', thread, bulk) if not brokers_resolved: logging.fatal('No brokers resolved.') return if not broker_timeout: # Allow zero in config broker_timeout = None logging.info('[broker] checking authentication method') use_ssl = True try: use_ssl = config_get_bool('messaging-hermes', 'use_ssl') except: logging.info( '[broker] could not find use_ssl in configuration -- please update your rucio.cfg' ) port = config_get_int('messaging-hermes', 'port') vhost = config_get('messaging-hermes', 'broker_virtual_host', raise_exception=False) if not use_ssl: username = config_get('messaging-hermes', 'username') password = config_get('messaging-hermes', 'password') port = config_get_int('messaging-hermes', 'nonssl_port') conns = [] for broker in brokers_resolved: if not use_ssl: logging.info( '[broker] setting up username/password authentication: %s' % broker) con = stomp.Connection12(host_and_ports=[(broker, port)], vhost=vhost, keepalive=True, timeout=broker_timeout) else: logging.info( '[broker] setting up ssl cert/key authentication: %s' % broker) con = stomp.Connection12( host_and_ports=[(broker, port)], use_ssl=True, ssl_key_file=config_get('messaging-hermes', 'ssl_key_file'), ssl_cert_file=config_get('messaging-hermes', 'ssl_cert_file'), vhost=vhost, keepalive=True, timeout=broker_timeout) con.set_listener( 'rucio-hermes', HermesListener(con.transport._Transport__host_and_ports[0])) conns.append(con) destination = config_get('messaging-hermes', 'destination') executable = 'hermes [broker]' hostname = socket.getfqdn() pid = os.getpid() heartbeat_thread = threading.current_thread() # Make an initial heartbeat so that all daemons have the correct worker number on the next try sanity_check(executable=executable, hostname=hostname, pid=pid, thread=heartbeat_thread) GRACEFUL_STOP.wait(1) while not GRACEFUL_STOP.is_set(): try: t_start = time.time() heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=heartbeat_thread) logging.debug('[broker] %i:%i - using: %s', heartbeat['assign_thread'], heartbeat['nr_threads'], [ conn.transport._Transport__host_and_ports[0][0] for conn in conns ]) messages = retrieve_messages(bulk=bulk, thread=heartbeat['assign_thread'], total_threads=heartbeat['nr_threads']) if messages: logging.debug('[broker] %i:%i - retrieved %i messages', heartbeat['assign_thread'], heartbeat['nr_threads'], len(messages)) to_delete = [] for message in messages: try: conn = random.sample(conns, 1)[0] if not conn.is_connected(): host_and_ports = conn.transport._Transport__host_and_ports[ 0][0] record_counter('daemons.hermes.reconnect.%s' % host_and_ports.split('.')[0]) conn.start() if not use_ssl: logging.info( '[broker] %i:%i - connecting with USERPASS to %s', heartbeat['assign_thread'], heartbeat['nr_threads'], host_and_ports) conn.connect(username, password, wait=True) else: logging.info( '[broker] %i:%i - connecting with SSL to %s', heartbeat['assign_thread'], heartbeat['nr_threads'], host_and_ports) conn.connect(wait=True) conn.send(body=json.dumps({ 'event_type': str(message['event_type']).lower(), 'payload': message['payload'], 'created_at': str(message['created_at']) }), destination=destination, headers={ 'persistent': 'true', 'event_type': str(message['event_type']).lower() }) to_delete.append({ 'id': message['id'], 'created_at': message['created_at'], 'updated_at': message['created_at'], 'payload': json.dumps(message['payload']), 'event_type': message['event_type'] }) except ValueError: logging.warn('Cannot serialize payload to JSON: %s', str(message['payload'])) to_delete.append({ 'id': message['id'], 'created_at': message['created_at'], 'updated_at': message['created_at'], 'payload': str(message['payload']), 'event_type': message['event_type'] }) continue except stomp.exception.NotConnectedException as error: logging.warn( 'Could not deliver message due to NotConnectedException: %s', str(error)) continue except stomp.exception.ConnectFailedException as error: logging.warn( 'Could not deliver message due to ConnectFailedException: %s', str(error)) continue except Exception as error: logging.warn('Could not deliver message: %s', str(error)) logging.critical(traceback.format_exc()) continue if str(message['event_type']).lower().startswith( 'transfer') or str(message['event_type']).lower( ).startswith('stagein'): logging.debug( '[broker] %i:%i - event_type: %s, scope: %s, name: %s, rse: %s, request-id: %s, transfer-id: %s, created_at: %s', heartbeat['assign_thread'], heartbeat['nr_threads'], str(message['event_type']).lower(), message['payload'].get('scope', None), message['payload'].get('name', None), message['payload'].get('dst-rse', None), message['payload'].get('request-id', None), message['payload'].get('transfer-id', None), str(message['created_at'])) elif str(message['event_type']).lower().startswith( 'dataset'): logging.debug( '[broker] %i:%i - event_type: %s, scope: %s, name: %s, rse: %s, rule-id: %s, created_at: %s)', heartbeat['assign_thread'], heartbeat['nr_threads'], str(message['event_type']).lower(), message['payload']['scope'], message['payload']['name'], message['payload']['rse'], message['payload']['rule_id'], str(message['created_at'])) elif str(message['event_type']).lower().startswith( 'deletion'): if 'url' not in message['payload']: message['payload']['url'] = 'unknown' logging.debug( '[broker] %i:%i - event_type: %s, scope: %s, name: %s, rse: %s, url: %s, created_at: %s)', heartbeat['assign_thread'], heartbeat['nr_threads'], str(message['event_type']).lower(), message['payload']['scope'], message['payload']['name'], message['payload']['rse'], message['payload']['url'], str(message['created_at'])) else: logging.debug('[broker] %i:%i - other message: %s', heartbeat['assign_thread'], heartbeat['nr_threads'], message) delete_messages(to_delete) logging.info('[broker] %i:%i - submitted %i messages', heartbeat['assign_thread'], heartbeat['nr_threads'], len(to_delete)) if once: break except NoResultFound: # silence this error: https://its.cern.ch/jira/browse/RUCIO-1699 pass except: logging.critical(traceback.format_exc()) t_delay = delay - (time.time() - t_start) t_delay = t_delay if t_delay > 0 else 0 if t_delay: logging.debug('[broker] %i:%i - sleeping %s seconds', heartbeat['assign_thread'], heartbeat['nr_threads'], t_delay) time.sleep(t_delay) for conn in conns: try: conn.disconnect() except Exception: pass logging.debug('[broker] %i:%i - graceful stop requested', heartbeat['assign_thread'], heartbeat['nr_threads']) die(executable, hostname, pid, heartbeat_thread) logging.debug('[broker] %i:%i - graceful stop done', heartbeat['assign_thread'], heartbeat['nr_threads'])
def request_transfer(loop=1, src=None, dst=None, upload=False, same_src=False, same_dst=False): """ Main loop to request a new transfer. """ logging.info('request: starting') session = get_session() src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) logging.info('request: started') i = 0 while not graceful_stop.is_set(): if i >= loop: return try: if not same_src: src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) if not same_dst: dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) tmp_name = generate_uuid() # add a new dataset scope = InternalScope('mock') account = InternalAccount('root') did.add_did(scope=scope, name='dataset-%s' % tmp_name, type=DIDType.DATASET, account=account, session=session) # construct PFN pfn = rsemanager.lfns2pfns(src_rse, lfns=[{ 'scope': scope.external, 'name': 'file-%s' % tmp_name }])['%s:file-%s' % (scope.external, tmp_name)] if upload: # create the directories if needed p = rsemanager.create_protocol(src_rse, operation='write', scheme='srm') p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical( 'Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, session=session) break # add the replica replica.add_replica(rse_id=src_rse['id'], scope=scope, name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account=account, session=session) logging.info('added replica on %s for DID mock:%s' % (src_rse['rse'], tmp_name)) # to the dataset did.attach_dids(scope=scope, name='dataset-%s' % tmp_name, dids=[{ 'scope': scope, 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes') }], account=account, session=session) # add rule for the dataset rule.add_rule(dids=[{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, copies=1, rse_expression=dst_rse['rse'], grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID %s:%s' % (dst_rse['rse'], scope, tmp_name)) session.commit() except: session.rollback() logging.critical(traceback.format_exc()) i += 1 logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def receiver(id_, total_threads=1, full_mode=False, all_vos=False): """ Main loop to consume messages from the FTS3 producer. """ logging.info('receiver starting in full mode: %s' % full_mode) logger_prefix = executable = 'conveyor-receiver' brokers_alias = [] brokers_resolved = [] try: brokers_alias = [ b.strip() for b in config_get('messaging-fts3', 'brokers').split(',') ] except Exception: raise Exception('Could not load brokers from configuration') logging.info('resolving broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP) brokers_resolved.extend(ai[4][0] for ai in addrinfos) logging.info('brokers resolved to %s', brokers_resolved) logging.info('checking authentication method') use_ssl = True try: use_ssl = config_get_bool('messaging-fts3', 'use_ssl') except: logging.info( 'could not find use_ssl in configuration -- please update your rucio.cfg' ) port = config_get_int('messaging-fts3', 'port') vhost = config_get('messaging-fts3', 'broker_virtual_host', raise_exception=False) if not use_ssl: username = config_get('messaging-fts3', 'username') password = config_get('messaging-fts3', 'password') port = config_get_int('messaging-fts3', 'nonssl_port') conns = [] for broker in brokers_resolved: if not use_ssl: logging.info('setting up username/password authentication: %s' % broker) con = stomp.Connection12(host_and_ports=[(broker, port)], use_ssl=False, vhost=vhost, reconnect_attempts_max=999) else: logging.info('setting up ssl cert/key authentication: %s' % broker) con = stomp.Connection12( host_and_ports=[(broker, port)], use_ssl=True, ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'), ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'), vhost=vhost, reconnect_attempts_max=999) conns.append(con) logging.info('receiver started') with HeartbeatHandler(executable=executable, renewal_interval=30, logger_prefix=logger_prefix) as heartbeat_handler: while not graceful_stop.is_set(): _, _, logger = heartbeat_handler.live() for conn in conns: if not conn.is_connected(): logger( logging.INFO, 'connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter( 'daemons.messaging.fts3.reconnect.{host}', labels={ 'host': conn.transport._Transport__host_and_ports[0] [0].split('.')[0] }) conn.set_listener( 'rucio-messaging-fts3', Receiver(broker=conn.transport. _Transport__host_and_ports[0], id_=id_, total_threads=total_threads, full_mode=full_mode, all_vos=all_vos)) if not use_ssl: conn.connect(username, password, wait=True) else: conn.connect(wait=True) conn.subscribe(destination=config_get( 'messaging-fts3', 'destination'), id='rucio-messaging-fts3', ack='auto') time.sleep(1) for conn in conns: try: conn.disconnect() except Exception: pass
def setup_activemq(logger): """ Deliver messages to ActiveMQ :param logger: The logger object. """ logger(logging.INFO, '[broker] Resolving brokers') brokers_alias = [] brokers_resolved = [] try: brokers_alias = [ broker.strip() for broker in config_get('messaging-hermes', 'brokers').split(',') ] except: raise Exception('Could not load brokers from configuration') logger(logging.INFO, '[broker] Resolving broker dns alias: %s', brokers_alias) brokers_resolved = [] for broker in brokers_alias: try: addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP) brokers_resolved.extend(ai[4][0] for ai in addrinfos) except socket.gaierror as ex: logger(logging.ERROR, '[broker] Cannot resolve domain name %s (%s)', broker, str(ex)) logger(logging.DEBUG, '[broker] Brokers resolved to %s', brokers_resolved) if not brokers_resolved: logger(logging.FATAL, '[broker] No brokers resolved.') return None, None, None, None, None broker_timeout = 3 if not broker_timeout: # Allow zero in config broker_timeout = None logger(logging.INFO, '[broker] Checking authentication method') use_ssl = True try: use_ssl = config_get_bool('messaging-hermes', 'use_ssl') except: logger( logging.INFO, '[broker] Could not find use_ssl in configuration -- please update your rucio.cfg' ) port = config_get_int('messaging-hermes', 'port') vhost = config_get('messaging-hermes', 'broker_virtual_host', raise_exception=False) if not use_ssl: username = config_get('messaging-hermes', 'username') password = config_get('messaging-hermes', 'password') port = config_get_int('messaging-hermes', 'nonssl_port') conns = [] for broker in brokers_resolved: if not use_ssl: logger(logging.INFO, '[broker] setting up username/password authentication: %s', broker) con = stomp.Connection12(host_and_ports=[(broker, port)], vhost=vhost, keepalive=True, timeout=broker_timeout) else: logger(logging.INFO, '[broker] setting up ssl cert/key authentication: %s', broker) con = stomp.Connection12( host_and_ports=[(broker, port)], use_ssl=True, ssl_key_file=config_get('messaging-hermes', 'ssl_key_file'), ssl_cert_file=config_get('messaging-hermes', 'ssl_cert_file'), vhost=vhost, keepalive=True, timeout=broker_timeout) con.set_listener( 'rucio-hermes', HermesListener(con.transport._Transport__host_and_ports[0])) conns.append(con) destination = config_get('messaging-hermes', 'destination') return conns, destination, username, password, use_ssl
def consumer(id_, num_thread=1): """ Main loop to consume messages from the Rucio Cache producer. """ prepend_str = 'cache-consumer ' logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Rucio Cache consumer starting') try: brokers_alias = [ b.strip() for b in config_get('messaging-cache', 'brokers').split(',') ] except: raise Exception( 'Could not load rucio cache brokers from configuration') use_ssl = True try: use_ssl = config_get_bool('messaging-cache', 'use_ssl') except Exception: pass if not use_ssl: username = config_get('messaging-cache', 'username') password = config_get('messaging-cache', 'password') destination = config_get('messaging-cache', 'destination') subscription_id = 'rucio-cache-messaging' vhost = config_get('messaging-cache', 'broker_virtual_host', raise_exception=False) port = config_get_int('messaging-cache', 'port') reconnect_attempts = config_get_int('messaging-cache', 'reconnect_attempts', default=100) ssl_key_file = config_get('messaging-cache', 'ssl_key_file', raise_exception=False) ssl_cert_file = config_get('messaging-cache', 'ssl_cert_file', raise_exception=False) conns = get_stomp_brokers(brokers=brokers_alias, port=port, use_ssl=use_ssl, vhost=vhost, reconnect_attempts=reconnect_attempts, ssl_key_file=ssl_key_file, ssl_cert_file=ssl_cert_file, timeout=None, logger=logger) logger(logging.INFO, 'consumer started') while not GRACEFUL_STOP.is_set(): for conn in conns: if not conn.is_connected(): host_port = conn.transport._Transport__host_and_ports[0] logger(logging.INFO, 'connecting to %s' % host_port[0]) record_counter('daemons.messaging.cache.reconnect.{host}', labels={'host': host_port[0]}) conn.set_listener( 'rucio-cache-consumer', AMQConsumer(broker=host_port, conn=conn, logger=logger)) if not use_ssl: conn.connect(username, password) else: conn.connect() conn.subscribe(destination=destination, ack='auto', id=subscription_id) time.sleep(1) logger(logging.INFO, 'graceful stop requested') for conn in conns: try: conn.disconnect() except: pass logger(logging.INFO, 'graceful stop done')
def __assign_paths_to_transfertool_and_create_hops( candidate_paths_by_request_id: "Dict[str: List[DirectTransferDefinition]]", transfertool_classes: "Optional[List[Type[Transfertool]]]" = None, logger: "Callable" = logging.log, session: "Optional[Session]" = None, ) -> "Tuple[Dict[TransferToolBuilder, List[DirectTransferDefinition]], Set[str]]": """ for each request, pick the first path which can be submitted by one of the transfertools. If the chosen path is multihop, create all missing intermediate requests and replicas. """ reqs_no_host = set() paths_by_transfertool_builder = {} default_tombstone_delay = config_get_int( 'transfers', 'multihop_tombstone_delay', default=transfer_core.DEFAULT_MULTIHOP_TOMBSTONE_DELAY, expiration_time=600) for request_id, candidate_paths in candidate_paths_by_request_id.items(): # Get the rws object from any candidate path. It is the same for all candidate paths. For multihop, the initial request is the last hop rws = candidate_paths[0][-1].rws # Selects the first path which can be submitted using a chain of supported transfertools # and for which the creation of intermediate hops (if it is a multihop) works correctly best_path = None builder_to_use = None hops_to_submit = [] must_skip_submission = False tt_assignments = [(transfer_path, __assign_to_transfertool(transfer_path, transfertool_classes, logger=logger)) for transfer_path in candidate_paths] # Prioritize the paths which need less transfertool transitions. # Ideally, the entire path should be submitted to a single transfertool for transfer_path, tt_assignment in sorted(tt_assignments, key=lambda t: len(t[1])): if not tt_assignment: logger( logging.INFO, '%s: None of the transfertools can submit the request: %s', request_id, [c.__name__ for c in transfertool_classes]) continue # Set the 'transfertool' field on the intermediate hops which should be created in the database for sub_path, tt_builder in tt_assignment: if tt_builder: for hop in sub_path: if hop is not transfer_path[-1]: hop.rws.transfertool = tt_builder.transfertool_class.external_name created, must_skip_submission = __create_missing_replicas_and_requests( transfer_path, default_tombstone_delay, logger=logger, session=session) if created: best_path = transfer_path # Only the first sub-path will be submitted to the corresponding transfertool, # the rest of the hops will wait for first hops to be transferred hops_to_submit, builder_to_use = tt_assignment[0] if created or must_skip_submission: break if not best_path: reqs_no_host.add(request_id) logger( logging.INFO, '%s: Cannot pick transfertool, or create intermediate requests' % request_id) continue transfer_core.ensure_db_sources(best_path, logger=logger, session=session) if len(best_path) > 1: logger( logging.INFO, '%s: Best path is multihop: %s' % (rws.request_id, transfer_core.transfer_path_str(best_path))) elif best_path is not candidate_paths[0] or len( best_path[0].sources) > 1: # Only print singlehop if it brings additional information: # - either it's not the first candidate path # - or it's a multi-source # in other cases, it doesn't bring any additional information to what is known from previous logs logger( logging.INFO, '%s: Best path is direct: %s' % (rws.request_id, transfer_core.transfer_path_str(best_path))) if must_skip_submission: logger( logging.INFO, '%s: Part of the transfer is already being handled. Skip for now.' % request_id) continue if len(hops_to_submit) < len(best_path): logger(logging.INFO, '%s: Only first %d hops will be submitted by %s', request_id, len(hops_to_submit), builder_to_use) paths_by_transfertool_builder.setdefault(builder_to_use, []).append(hops_to_submit) return paths_by_transfertool_builder, reqs_no_host
def deliver_messages(once=False, brokers_resolved=None, process=0, total_processes=1, thread=0, total_threads=1, bulk=1000): """ Main loop to deliver messages to a broker. """ logging.info('hermes starting - process (%i/%i) thread (%i/%i) bulk (%i)' % (process, total_processes, thread, total_threads, bulk)) conns = [] for broker in brokers_resolved: conns.append(stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-hermes', 'port'))], use_ssl=True, ssl_key_file=config_get('messaging-hermes', 'ssl_key_file'), ssl_cert_file=config_get('messaging-hermes', 'ssl_cert_file'), ssl_version=ssl.PROTOCOL_TLSv1)) logging.info('hermes started - process (%i/%i) thread (%i/%i) bulk (%i)' % (process, total_processes, thread, total_threads, bulk)) while not graceful_stop.is_set(): try: for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.hermes.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.start() conn.connect() tmp = retrieve_messages(bulk=bulk, process=process, total_processes=total_processes, thread=thread, total_threads=total_threads) if tmp == []: time.sleep(1) else: to_delete = [] for t in tmp: try: random.sample(conns, 1)[0].send(body=json.dumps({'event_type': str(t['event_type']).lower(), 'payload': t['payload'], 'created_at': str(t['created_at'])}), destination=config_get('messaging-hermes', 'destination')) except ValueError: logging.warn('Cannot serialize payload to JSON: %s' % str(t['payload'])) continue except Exception, e: logging.warn('Could not deliver message: %s' % str(e)) continue to_delete.append(t['id']) if str(t['event_type']).lower().startswith("transfer"): logging.debug('%i:%i - event_type: %s, scope: %s, name: %s, rse: %s, request-id: %s, transfer-id: %s, created_at: %s' % (process, thread, str(t['event_type']).lower(), t['payload']['scope'], t['payload']['name'], t['payload']['dst-rse'], t['payload']['request-id'], t['payload']['transfer-id'], str(t['created_at']))) elif str(t['event_type']).lower().startswith("dataset"): logging.debug('%i:%i - event_type: %s, scope: %s, name: %s, rse: %s, rule-id: %s, created_at: %s)' % (process, thread, str(t['event_type']).lower(), t['payload']['scope'], t['payload']['name'], t['payload']['rse'], t['payload']['rule_id'], str(t['created_at']))) elif str(t['event_type']).lower().startswith("deletion"): if 'url' not in t['payload']: t['payload']['url'] = 'unknown' logging.debug('%i:%i - event_type: %s, scope: %s, name: %s, rse: %s, url: %s, created_at: %s)' % (process, thread, str(t['event_type']).lower(), t['payload']['scope'], t['payload']['name'], t['payload']['rse'], t['payload']['url'], str(t['created_at']))) else: logging.debug('%i:%i -other message: %s' % (process, thread, t)) delete_messages(to_delete) except: logging.critical(traceback.format_exc()) logging.debug('%i:%i - graceful stop requests' % (process, thread)) for conn in conns: try: conn.disconnect() except: pass logging.debug('%i:%i - graceful stop done' % (process, thread))
HANDLER.setFormatter(LOGFORMATTER) HANDLER.suffix = "%Y-%m-%d" LOGGER.addHandler(HANDLER) except: if 'sphinx' not in sys.modules: raise BROKERS_ALIAS, BROKERS_RESOLVED = [], [] try: BROKERS_ALIAS = [b.strip() for b in config_get('nongrid-trace', 'brokers').split(',')] except: if 'sphinx' not in sys.modules: raise Exception('Could not load brokers from configuration') try: PORT = config_get_int('nongrid-trace', 'port') TOPIC = config_get('nongrid-trace', 'topic') USERNAME = config_get('nongrid-trace', 'username') PASSWORD = config_get('nongrid-trace', 'password') except: if 'sphinx' not in sys.modules: raise logging.getLogger("stomp").setLevel(logging.CRITICAL) for broker in BROKERS_ALIAS: try: BROKERS_RESOLVED.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')]) BROKERS_RESOLVED = [item for sublist in BROKERS_RESOLVED for item in sublist] except: pass
def stager(once=False, rses=None, bulk=100, group_bulk=1, group_policy='rule', source_strategy=None, activities=None, sleep_time=600): """ Main loop to submit a new transfer primitive to a transfertool. """ try: scheme = config_get('conveyor', 'scheme') except NoOptionError: scheme = None try: failover_scheme = config_get('conveyor', 'failover_scheme') except NoOptionError: failover_scheme = None try: bring_online = config_get_int('conveyor', 'bring_online') except NoOptionError: bring_online = 43200 try: max_time_in_queue = {} timelife_conf = config_get('conveyor', 'max_time_in_queue') timelife_confs = timelife_conf.split(",") for conf in timelife_confs: act, timelife = conf.split(":") max_time_in_queue[act.strip()] = int(timelife.strip()) except NoOptionError: max_time_in_queue = {} if 'default' not in max_time_in_queue: max_time_in_queue['default'] = 168 logging.debug("Maximum time in queue for different activities: %s" % max_time_in_queue) logger_prefix = executable = 'conveyor-stager' if activities: activities.sort() executable += '--activities ' + str(activities) if rses: rse_ids = [rse['id'] for rse in rses] else: rse_ids = None transfertool_kwargs = { FTS3Transfertool: { 'group_policy': group_policy, 'group_bulk': group_bulk, 'source_strategy': source_strategy, 'max_time_in_queue': max_time_in_queue, 'bring_online': bring_online, 'default_lifetime': -1, } } run_daemon( once=once, graceful_stop=graceful_stop, executable=executable, logger_prefix=logger_prefix, partition_wait_time=None, sleep_time=sleep_time, run_once_fnc=functools.partial( run_once, bulk=bulk, group_bulk=group_bulk, scheme=scheme, failover_scheme=failover_scheme, rse_ids=rse_ids, transfertool_kwargs=transfertool_kwargs, ), activities=activities, )
def receiver(id, total_threads=1, full_mode=False): """ Main loop to consume messages from the FTS3 producer. """ logging.info('receiver starting in full mode: %s' % full_mode) executable = ' '.join(sys.argv) hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) # Make an initial heartbeat so that all finishers have the correct worker number on the next try heartbeat.live(executable, hostname, pid, hb_thread) brokers_alias = [] brokers_resolved = [] try: brokers_alias = [b.strip() for b in config_get('messaging-fts3', 'brokers').split(',')] except Exception: raise Exception('Could not load brokers from configuration') logging.info('resolving broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP) brokers_resolved.extend(ai[4][0] for ai in addrinfos) logging.info('brokers resolved to %s', brokers_resolved) logging.info('checking authentication method') use_ssl = True try: use_ssl = config_get_bool('messaging-fts3', 'use_ssl') except: logging.info('could not find use_ssl in configuration -- please update your rucio.cfg') port = config_get_int('messaging-fts3', 'port') vhost = config_get('messaging-fts3', 'broker_virtual_host', raise_exception=False) if not use_ssl: username = config_get('messaging-fts3', 'username') password = config_get('messaging-fts3', 'password') port = config_get_int('messaging-fts3', 'nonssl_port') conns = [] for broker in brokers_resolved: if not use_ssl: logging.info('setting up username/password authentication: %s' % broker) con = stomp.Connection12(host_and_ports=[(broker, port)], use_ssl=False, vhost=vhost, reconnect_attempts_max=999) else: logging.info('setting up ssl cert/key authentication: %s' % broker) con = stomp.Connection12(host_and_ports=[(broker, port)], use_ssl=True, ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'), ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'), vhost=vhost, reconnect_attempts_max=999) conns.append(con) logging.info('receiver started') while not graceful_stop.is_set(): heartbeat.live(executable, hostname, pid, hb_thread) for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.messaging.fts3.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0]) conn.set_listener('rucio-messaging-fts3', Receiver(broker=conn.transport._Transport__host_and_ports[0], id=id, total_threads=total_threads, full_mode=full_mode)) conn.start() if not use_ssl: conn.connect(username, password, wait=True) else: conn.connect(wait=True) conn.subscribe(destination=config_get('messaging-fts3', 'destination'), id='rucio-messaging-fts3', ack='auto') time.sleep(1) logging.info('receiver graceful stop requested') for conn in conns: try: conn.disconnect() except Exception: pass heartbeat.die(executable, hostname, pid, hb_thread) logging.info('receiver graceful stop done')
def list_rebalance_rule_candidates(rse_id, mode=None, session=None): """ List the rebalance rule candidates based on the agreed on specification :param rse_id: RSE of the source. :param mode: Rebalancing mode. :param session: DB Session. """ vo = get_rse_vo(rse_id=rse_id) # dumps can be applied only for decommission since the dumps doesn't contain info from dids if mode == 'decommission': return _list_rebalance_rule_candidates_dump(rse_id, mode) # If no decommissioning use SQLAlchemy # Rules constraints. By default only moves rules in state OK that have no children and have only one copy # Additional constraints can be imposed by setting specific configuration rule_clause = [ models.ReplicationRule.state == RuleState.OK, models.ReplicationRule.child_rule_id.is_(None), models.ReplicationRule.copies == 1 ] # Only move rules w/o expiration date, or rules with expiration_date > >min_expires_date_in_days> days expiration_clause = models.ReplicationRule.expires_at.is_(None) min_expires_date_in_days = config_get_int( section='bb8', option='min_expires_date_in_days', raise_exception=False, default=-1, expiration_time=3600) if min_expires_date_in_days > 0: min_expires_date_in_days = datetime.utcnow() + timedelta( days=min_expires_date_in_days) expiration_clause = or_( models.ReplicationRule.expires_at > min_expires_date_in_days, models.ReplicationRule.expires_at.is_(None)) rule_clause.append(expiration_clause) # Only move rules which were created more than <min_created_days> days ago min_created_days = config_get_int(section='bb8', option='min_created_days', raise_exception=False, default=-1, expiration_time=3600) if min_created_days > 0: min_created_days = datetime.now() - timedelta(days=min_created_days) rule_clause.append( models.ReplicationRule.created_at < min_created_days) # Only move rules which are owned by <allowed_accounts> (coma separated accounts, e.g. panda,root,ddmadmin,jdoe) allowed_accounts = config_get(section='bb8', option='allowed_accounts', raise_exception=False, default=None, expiration_time=3600) if allowed_accounts: allowed_accounts = [ InternalAccount(acc.strip(' '), vo=vo) for acc in allowed_accounts.split(',') ] rule_clause.append( models.ReplicationRule.account.in_(allowed_accounts)) # Only move rules that have a certain grouping <allowed_grouping> (accepted values : all, dataset, none) rule_grouping_mapping = { 'all': RuleGrouping.ALL, 'dataset': RuleGrouping.DATASET, 'none': RuleGrouping.NONE } allowed_grouping = config_get(section='bb8', option='allowed_grouping', raise_exception=False, default=None, expiration_time=3600) if allowed_grouping: rule_clause.append(models.ReplicationRule.grouping == rule_grouping_mapping.get(allowed_grouping)) # DIDs constraints. By default only moves rules of DID where we can compute the size # Additional constraints can be imposed by setting specific configuration did_clause = [models.DataIdentifier.bytes.isnot(None)] type_to_did_type_mapping = { 'all': [DIDType.CONTAINER, DIDType.DATASET, DIDType.FILE], 'collection': [DIDType.CONTAINER, DIDType.DATASET], 'container': [DIDType.CONTAINER], 'dataset': [DIDType.DATASET], 'file': [DIDType.FILE] } # Only allows to migrate rules of a certain did_type <allowed_did_type> (accepted values : all, collection, container, dataset, file) allowed_did_type = config_get(section='bb8', option='allowed_did_type', raise_exception=False, default=None, expiration_time=3600) if allowed_did_type: allowed_did_type = [ models.DataIdentifier.did_type == did_type for did_type in type_to_did_type_mapping.get(allowed_did_type) ] did_clause.append(or_(allowed_did_type)) # Only allows to migrate rules of closed DID is <only_move_closed_did> is set only_move_closed_did = config_get_bool(section='bb8', option='only_move_closed_did', raise_exception=False, default=None, expiration_time=3600) if only_move_closed_did: did_clause.append(models.DataIdentifier.is_open == False) # NOQA # Now build the query external_dsl = aliased(models.DatasetLock) count_locks = select([func.count()]).where( and_(external_dsl.scope == models.DatasetLock.scope, external_dsl.name == models.DatasetLock.name, external_dsl.rse_id == models.DatasetLock.rse_id)).as_scalar() query = session.query(models.DatasetLock.scope, models.DatasetLock.name, models.ReplicationRule.id, models.ReplicationRule.rse_expression, models.ReplicationRule.subscription_id, models.DataIdentifier.bytes, models.DataIdentifier.length, case([(or_(models.DatasetLock.length < 1, models.DatasetLock.length.is_(None)), 0)], else_=cast(models.DatasetLock.bytes / models.DatasetLock.length, BigInteger))).\ join(models.ReplicationRule, models.ReplicationRule.id == models.DatasetLock.rule_id).\ join(models.DataIdentifier, and_(models.DatasetLock.scope == models.DataIdentifier.scope, models.DatasetLock.name == models.DataIdentifier.name)).\ filter(models.DatasetLock.rse_id == rse_id).\ filter(and_(*rule_clause)).\ filter(and_(*did_clause)).\ filter(case([(or_(models.DatasetLock.length < 1, models.DatasetLock.length.is_(None)), 0)], else_=cast(models.DatasetLock.bytes / models.DatasetLock.length, BigInteger)) > 1000000000).\ filter(count_locks == 1) summary = query.order_by( case([(or_(models.DatasetLock.length < 1, models.DatasetLock.length.is_(None)), 0)], else_=cast(models.DatasetLock.bytes / models.DatasetLock.length, BigInteger)), models.DatasetLock.accessed_at).all() return summary
def receiver(id, total_threads=1, full_mode=False): """ Main loop to consume messages from the FTS3 producer. """ logging.info('receiver starting in full mode: %s' % full_mode) executable = ' '.join(sys.argv) hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) # Make an initial heartbeat so that all finishers have the correct worker number on the next try heartbeat.live(executable, hostname, pid, hb_thread) brokers_alias = [] brokers_resolved = [] try: brokers_alias = [ b.strip() for b in config_get('messaging-fts3', 'brokers').split(',') ] except: raise Exception('Could not load brokers from configuration') logging.info('resolving broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: brokers_resolved.append([ str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A') ]) brokers_resolved = [ item for sublist in brokers_resolved for item in sublist ] logging.info('brokers resolved to %s', brokers_resolved) conns = [] for broker in brokers_resolved: conns.append( stomp.Connection(host_and_ports=[ (broker, config_get_int('messaging-fts3', 'port')) ], use_ssl=True, ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'), ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'), ssl_version=ssl.PROTOCOL_TLSv1, reconnect_attempts_max=999)) logging.info('receiver started') while not graceful_stop.is_set(): heartbeat.live(executable, hostname, pid, hb_thread) for conn in conns: if not conn.is_connected(): logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0]) record_counter('daemons.messaging.fts3.reconnect.%s' % conn.transport._Transport__host_and_ports[0] [0].split('.')[0]) conn.set_listener( 'rucio-messaging-fts3', Receiver( broker=conn.transport._Transport__host_and_ports[0], id=id, total_threads=total_threads, full_mode=full_mode)) conn.start() conn.connect() conn.subscribe(destination=config_get('messaging-fts3', 'destination'), id='rucio-messaging-fts3', ack='auto') time.sleep(1) logging.info('receiver graceful stop requested') for conn in conns: try: conn.disconnect() except: pass heartbeat.die(executable, hostname, pid, hb_thread) logging.info('receiver graceful stop done')
from rucio.common.config import config_get, config_get_bool, config_get_int from rucio.core import config as config_core from rucio.core.rse import get_rse_id, get_rse_transfer_limits queue_mode = config_get('conveyor', 'queue_mode', False, 'default') if queue_mode.upper() == 'STRICT': queue_mode = 'strict' config_memcache = config_get('conveyor', 'using_memcache', False, 'False') if config_memcache.upper() == 'TRUE': using_memcache = True else: using_memcache = False REGION_SHORT = make_region_memcached( expiration_time=config_get_int('conveyor', 'cache_time', False, 600)) def get_transfer_limits(activity, rse_id, logger=logging.log): """ Get RSE transfer limits. :param activity: The activity. :param rse_id: The RSE id. :param logger: Optional decorated logger that can be passed from the calling daemons or servers. :returns: max_transfers if exists else None. """ try: if queue_mode == 'strict': threshold = get_config_limit(activity, rse_id)