def __init__(self, datatypes, dest_rse_expr, max_bytes_hour, max_files_hour, max_bytes_hour_rse, max_files_hour_rse, min_popularity, min_recent_requests, max_replicas):
        self._fsc = FreeSpaceCollector()
        self._nmc = NetworkMetricsCollector()
        self._added_cache = ExpiringDatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)
        self._dc = DatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)
        self._added_bytes = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_bytes_")
        self._added_files = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_files_")

        self._datatypes = datatypes.split(',')
        self._dest_rse_expr = dest_rse_expr
        self._max_bytes_hour = max_bytes_hour
        self._max_files_hour = max_files_hour
        self._max_bytes_hour_rse = max_bytes_hour_rse
        self._max_files_hour_rse = max_files_hour_rse
        self._min_popularity = min_popularity
        self._min_recent_requests = min_recent_requests
        self._max_replicas = max_replicas

        rses = parse_expression(self._dest_rse_expr)

        self._rses = {}
        self._sites = {}
        for rse in rses:
            rse_attrs = list_rse_attributes(rse['rse'])
            rse_attrs['rse'] = rse['rse']
            self._rses[rse['rse']] = rse_attrs
            self._sites[rse_attrs['site']] = rse_attrs

        self._dst_penalties = {}
        self._src_penalties = {}

        self._print_params()
Ejemplo n.º 2
0
def consumer(id, num_thread=1):
    """
    Main loop to consume messages from the Rucio Cache producer.
    """

    logging.info('Rucio Cache consumer starting')

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [b.strip() for b in config_get('messaging-cache', 'brokers').split(',')]
    except:
        raise Exception('Could not load rucio cache brokers from configuration')

    logging.info('resolving rucio cache broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')])
    brokers_resolved = [item for sublist in brokers_resolved for item in sublist]

    logging.debug('Rucio cache brokers resolved to %s', brokers_resolved)

    conns = {}
    for broker in brokers_resolved:
        conn = stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-cache', 'port'))],
                                use_ssl=True,
                                ssl_key_file=config_get('messaging-cache', 'ssl_key_file'),
                                ssl_cert_file=config_get('messaging-cache', 'ssl_cert_file'),
                                ssl_version=ssl.PROTOCOL_TLSv1)
        conns[conn] = Consumer(conn.transport._Transport__host_and_ports[0], account=config_get('messaging-cache', 'account'), id=id, num_thread=num_thread)

    logging.info('consumer started')

    while not GRACEFUL_STOP.is_set():

        for conn in conns:

            if not conn.is_connected():
                logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.messaging.cache.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])

                conn.set_listener('rucio-cache-messaging', conns[conn])
                conn.start()
                conn.connect()
                conn.subscribe(destination=config_get('messaging-cache', 'destination'),
                               id='rucio-cache-messaging',
                               ack='auto')

        time.sleep(1)

    logging.info('graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logging.info('graceful stop done')
Ejemplo n.º 3
0
def consumer(id_, num_thread=1):
    """
    Main loop to consume messages from the Rucio Cache producer.
    """

    logging.info('Rucio Cache consumer starting')

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [b.strip() for b in config_get('messaging-cache', 'brokers').split(',')]
    except:
        raise Exception('Could not load rucio cache brokers from configuration')

    logging.info('resolving rucio cache broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP)
        brokers_resolved.extend(ai[4][0] for ai in addrinfos)

    logging.debug('Rucio cache brokers resolved to %s', brokers_resolved)

    conns = {}
    for broker in brokers_resolved:
        conn = stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-cache', 'port'))],
                                use_ssl=True,
                                ssl_key_file=config_get('messaging-cache', 'ssl_key_file'),
                                ssl_cert_file=config_get('messaging-cache', 'ssl_cert_file'),
                                vhost=config_get('messaging-cache', 'broker_virtual_host', raise_exception=False)
                                )
        conns[conn] = Consumer(conn.transport._Transport__host_and_ports[0], account=config_get('messaging-cache', 'account'), id_=id_, num_thread=num_thread)

    logging.info('consumer started')

    while not GRACEFUL_STOP.is_set():

        for conn in conns:

            if not conn.is_connected():
                logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.messaging.cache.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])

                conn.set_listener('rucio-cache-messaging', conns[conn])
                conn.connect()
                conn.subscribe(destination=config_get('messaging-cache', 'destination'),
                               id='rucio-cache-messaging',
                               ack='auto')

        time.sleep(1)

    logging.info('graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logging.info('graceful stop done')
Ejemplo n.º 4
0
    def _retry_protocol_stat(self, protocol, pfn):
        """
        Try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail
        :param protocol     The protocol to use to reach this file
        :param pfn          Physical file name of the target for the protocol stat
        """
        retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6)
        for attempt in range(retries):
            try:
                self.logger.debug('stat: pfn=%s' % pfn)
                stats = protocol.stat(pfn)

                if int(stats['filesize']) == 0:
                    raise Exception('Filesize came back as 0. Potential storage race condition, need to retry.')

                return stats
            except RSEChecksumUnavailable as error:
                # The stat succeeded here, but the checksum failed
                raise error
            except Exception as error:
                self.logger.debug('stat: unexpected error=%s' % error)
                fail_str = ['The requested service is not available at the moment', 'Permission refused']
                if any(x in str(error) for x in fail_str):
                    raise error
                self.logger.debug('stat: unknown edge case, retrying in %ss' % 2**attempt)
                time.sleep(2**attempt)
        return protocol.stat(pfn)
Ejemplo n.º 5
0
def kronos_dataset(thread=0, dataset_queue=None, sleep_time=60):
    logging.info('kronos-dataset[%d/?] starting', thread)

    executable = 'kronos-dataset'
    hostname = socket.gethostname()
    pid = getpid()
    hb_thread = current_thread()

    dataset_wait = config_get_int('tracer-kronos', 'dataset_wait')
    start = datetime.now()
    sanity_check(executable=executable, hostname=hostname)
    while not graceful_stop.is_set():
        start_time = time()
        heart_beat = live(executable, hostname, pid, hb_thread)
        prepend_str = 'kronos-dataset[%i/%i] ' % (heart_beat['assign_thread'],
                                                  heart_beat['nr_threads'])
        logger = formatted_logger(logging.log, prepend_str + '%s')
        if (datetime.now() - start).seconds > dataset_wait:
            __update_datasets(dataset_queue, logger=logger)
            start = datetime.now()

        tottime = time() - start_time
        if tottime < sleep_time:
            logger(logging.INFO,
                   'Will sleep for %s seconds' % (sleep_time - tottime))
            sleep(sleep_time - tottime)

    die(executable=executable, hostname=hostname, pid=pid, thread=thread)

    # once again for the backlog
    logger(logging.INFO, 'cleaning dataset backlog before shutdown...')
    __update_datasets(dataset_queue)
Ejemplo n.º 6
0
def kronos_dataset(once=False, thread=0, dataset_queue=None, sleep_time=60):
    logging.info('(kronos_dataset) starting')

    hostname = socket.gethostname()
    pid = getpid()
    thread = current_thread()

    dataset_wait = config_get_int('tracer-kronos', 'dataset_wait')
    start = datetime.now()
    sanity_check(executable='kronos-dataset', hostname=hostname)
    while not graceful_stop.is_set():
        start_time = time()
        live(executable='kronos-dataset',
             hostname=hostname,
             pid=pid,
             thread=thread)
        if (datetime.now() - start).seconds > dataset_wait:
            __update_datasets(dataset_queue)
            start = datetime.now()
        tottime = time() - start_time
        if tottime < sleep_time:
            logging.info('(kronos_dataset) Will sleep for %s seconds' %
                         (sleep_time - tottime))
            sleep(sleep_time - tottime)
    # once again for the backlog
    die(executable='kronos-dataset', hostname=hostname, pid=pid, thread=thread)
    logging.info(
        '(kronos_dataset) cleaning dataset backlog before shutdown...')
    __update_datasets(dataset_queue)
Ejemplo n.º 7
0
def run(total_workers=1, once=False, inputfile=None):
    """
    Starts up the automatix threads.
    """
    try:
        sites = [
            s.strip() for s in config_get('automatix', 'sites').split(',')
        ]
    except Exception:
        raise Exception('Could not load sites from configuration')
    if not inputfile:
        inputfile = '/opt/rucio/etc/automatix.json'
    try:
        sleep_time = config_get_int('automatix', 'sleep_time')
    except Exception:
        sleep_time = 3600
    try:
        account = config_get_int('automatix', 'account')
    except Exception:
        account = 'root'
    try:
        dataset_lifetime = config_get_int('automatix', 'dataset_lifetime')
    except Exception:
        dataset_lifetime = None
    try:
        set_metadata = config_get_bool('automatix', 'set_metadata')
    except Exception:
        set_metadata = False

    threads = list()
    for worker_number in range(0, total_workers):
        kwargs = {
            'worker_number': worker_number + 1,
            'total_workers': total_workers,
            'once': once,
            'sites': sites,
            'sleep_time': sleep_time,
            'account': account,
            'inputfile': inputfile,
            'set_metadata': set_metadata,
            'dataset_lifetime': dataset_lifetime
        }
        threads.append(threading.Thread(target=automatix, kwargs=kwargs))
    [thread.start() for thread in threads]
    while threads[0].is_alive():
        logging.debug('Still %i active threads' % len(threads))
        [thread.join(timeout=3.14) for thread in threads]
Ejemplo n.º 8
0
def consumer(id, total_threads=1):
    """
    Main loop to consume messages from the FTS3 producer.
    """

    logging.info('consumer starting')

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [b.strip() for b in config_get('messaging-fts3', 'brokers').split(',')]
    except:
        raise Exception('Could not load brokers from configuration')

    logging.info('resolving broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')])
    brokers_resolved = [item for sublist in brokers_resolved for item in sublist]

    logging.debug('brokers resolved to %s', brokers_resolved)

    conns = []
    for broker in brokers_resolved:
        conns.append(stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-fts3', 'port'))],
                                      use_ssl=True,
                                      ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'),
                                      ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'),
                                      ssl_version=ssl.PROTOCOL_TLSv1))

    logging.info('consumer started')

    while not graceful_stop.is_set():

        for conn in conns:

            if not conn.is_connected():
                logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.messaging.fts3.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])

                conn.set_listener('rucio-messaging-fts3', Consumer(broker=conn.transport._Transport__host_and_ports[0], id=id, total_threads=total_threads))
                conn.start()
                conn.connect()
                conn.subscribe(destination=config_get('messaging-fts3', 'destination'),
                               id='rucio-messaging-fts3',
                               ack='auto')

        time.sleep(1)

    logging.info('graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logging.info('graceful stop done')
Ejemplo n.º 9
0
        def __init__(self, delete_keys=False):
            self._avg_jobs = {}
            self._cur_jobs = {}
            self._max_jobs = {}
            self._tms = RedisTimeSeries(
                config_get('c3po', 'redis_host'),
                config_get_int('c3po', 'redis_port'),
                config_get_int('c3po-workload', 'window'), 'jobs_')

            self._request_headers = {
                "Accept": "application/json",
                "Content-Type": "application/json"
            }
            self._request_url = config_get('c3po-workload', 'panda_url')
            if delete_keys:
                self._tms.delete_keys()
            self.reload_cache()
Ejemplo n.º 10
0
def kronos_dataset(once=False, process=0, total_processes=1, thread=0, total_threads=1, dataset_queue=None):
    dataset_wait = config_get_int('tracer-kronos', 'dataset_wait')
    start = datetime.now()
    while not graceful_stop.is_set():
        if (datetime.now() - start).seconds > dataset_wait:
            __update_datasets(dataset_queue)
            start = datetime.now()
        sleep(10)
    # once again for the backlog
    logging.info('cleaning dataset backlog before shutdown...')
    __update_datasets(dataset_queue)
Ejemplo n.º 11
0
    def __init__(self):
        self._fsc = FreeSpaceCollector()
        self._dc = DatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)

        rse_expr = "tier=2&type=DATADISK"
        rse_attrs = parse_expression(rse_expr)

        self._rses = []
        for rse in rse_attrs:
            self._rses.append(rse['rse'])

        self.__setup_penalties()
Ejemplo n.º 12
0
def bulk_submit_xfer(submitjob, recursive=False, logger=logging.log):
    cfg = load_config(logger=logger)
    client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id']
    auth_client = NativeAppAuthClient(client_id)
    refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token']
    source_endpoint_id = submitjob[0].get('metadata').get(
        'source_globus_endpoint_id')
    destination_endpoint_id = submitjob[0].get('metadata').get(
        'dest_globus_endpoint_id')
    authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token,
                                        auth_client=auth_client)
    tc = TransferClient(authorizer=authorizer)

    # make job_label for task a timestamp
    now = datetime.datetime.now()
    job_label = now.strftime('%Y%m%d%H%M%s')

    # retrieve globus_task_deadline value to enforce time window to complete transfers
    # default is 2880 minutes or 48 hours
    globus_task_deadline = config_get_int('conveyor', 'globus_task_deadline',
                                          False, 2880)
    deadline = now + datetime.timedelta(minutes=globus_task_deadline)

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source
    # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means
    # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the
    # file was transferred correctly.  If the checksums do not match, it will redo the transfer of that file.
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum",
                         deadline=str(deadline))

    for file in submitjob:
        source_path = file.get('sources')[0]
        dest_path = file.get('destinations')[0]
        filesize = file['metadata']['filesize']
        # TODO: support passing a recursive parameter to Globus
        # md5 = file['metadata']['md5']
        # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5)
        tdata.add_item(source_path, dest_path, recursive=False)
        record_counter(
            'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize',
            filesize)

    # logging.info('submitting transfer...')
    transfer_result = tc.submit_transfer(tdata)
    logger(logging.INFO, "transfer_result: %s" % transfer_result)

    return transfer_result["task_id"]
Ejemplo n.º 13
0
def run(total_workers=1, once=False, inputfile=None):
    """
    Starts up the automatix threads.
    """
    try:
        sites = [s.strip() for s in config_get('automatix', 'sites').split(',')]
    except:
        raise Exception('Could not load sites from configuration')
    if not inputfile:
        inputfile = '/opt/rucio/etc/automatix.json'
    try:
        sleep_time = config_get_int('automatix', 'sleep_time')
    except:
        sleep_time = 3600
    try:
        account = config_get_int('automatix', 'account')
    except:
        account = 'root'
    try:
        dataset_lifetime = config_get_int('automatix', 'dataset_lifetime')
    except:
        dataset_lifetime = None
    threads = list()
    for worker_number in xrange(0, total_workers):
        kwargs = {'worker_number': worker_number + 1,
                  'total_workers': total_workers,
                  'once': once,
                  'sites': sites,
                  'sleep_time': sleep_time,
                  'account': account,
                  'inputfile': inputfile,
                  'dataset_lifetime': dataset_lifetime}
        threads.append(threading.Thread(target=automatix, kwargs=kwargs))
    [t.start() for t in threads]
    while threads[0].is_alive():
        logging.debug('Still %i active threads' % len(threads))
        [t.join(timeout=3.14) for t in threads]
Ejemplo n.º 14
0
 def _retry_protocol_stat(self, protocol, pfn):
     """
     try to stat file, on fail try again 1s, 2s, 4s, 8s, 16s, 32s later. Fail is all fail
     :param protocol     The protocol to use to reach this file
     :param pfn          Physical file name of the target for the protocol stat
     """
     retries = config_get_int('client', 'protocol_stat_retries', raise_exception=False, default=6)
     for attempt in range(retries):
         try:
             stats = protocol.stat(pfn)
             return stats
         except RSEChecksumUnavailable as error:
             # The stat succeeded here, but the checksum failed
             raise error
         except Exception as error:
             time.sleep(2**attempt)
     return protocol.stat(pfn)
Ejemplo n.º 15
0
def __geoip_db():
    db_path = Path(f'/tmp/{GEOIP_DB_EDITION}.mmdb')
    db_expire_delay = timedelta(days=config_get_int(
        'core', 'geoip_expire_delay', raise_exception=False, default=30))

    must_download = False
    if not db_path.is_file():
        print('%s does not exist. Downloading it.' % db_path)
        must_download = True
    elif db_expire_delay and datetime.fromtimestamp(
            db_path.stat().st_mtime) < datetime.now() - db_expire_delay:
        print('%s is too old. Re-downloading it.' % db_path)
        must_download = True

    if must_download:
        __download_geoip_db(destination=db_path)

    return geoip2.database.Reader(str(db_path))
Ejemplo n.º 16
0
def kronos_dataset(once=False, thread=0, dataset_queue=None):
    logging.info('(kronos_dataset) starting')

    hostname = gethostname()
    pid = getpid()
    thread = current_thread()

    dataset_wait = config_get_int('tracer-kronos', 'dataset_wait')
    start = datetime.now()
    while not graceful_stop.is_set():
        live(executable='kronos-dataset', hostname=hostname, pid=pid, thread=thread)
        if (datetime.now() - start).seconds > dataset_wait:
            __update_datasets(dataset_queue)
            start = datetime.now()
        sleep(10)
    # once again for the backlog
    die(executable='rucio-dataset', hostname=hostname, pid=pid, thread=thread)
    logging.info('(kronos_dataset) cleaning dataset backlog before shutdown...')
    __update_datasets(dataset_queue)
Ejemplo n.º 17
0
CONFIG_TRACE_LOGLEVEL = getattr(
    logging,
    config_get('nongrid-trace',
               'loglevel',
               raise_exception=False,
               default='DEBUG').upper())
CONFIG_TRACE_LOGFORMAT = config_get('nongrid-trace',
                                    'logformat',
                                    raise_exception=False,
                                    default='%(message)s')
CONFIG_TRACE_TRACEDIR = config_get('nongrid-trace',
                                   'tracedir',
                                   raise_exception=False,
                                   default='/var/log/rucio')
CONFIG_TRACE_MAXBYTES = config_get_int('nongrid-trace',
                                       'maxbytes',
                                       raise_exception=False,
                                       default=1000000000)
CONFIG_TRACE_BACKUPCOUNT = config_get_int('nongrid-trace',
                                          'backupCount',
                                          raise_exception=False,
                                          default=10)

# reset root logger handlers. Otherwise everything from ROTATING_LOGGER will also end up in the apache logs.
logging.getLogger().handlers = []

LOGGER = logging.getLogger('nongrid_trace')
LOGGER.setLevel(CONFIG_COMMON_LOGLEVEL)

ROTATING_LOGGER = logging.getLogger('nongrid_trace_buffer')
ROTATING_LOGGER.setLevel(CONFIG_TRACE_LOGLEVEL)
Ejemplo n.º 18
0
def consumer(id, num_thread=1):
    """
    Main loop to consume messages from the Rucio Cache producer.
    """

    logging.info('Rucio Cache consumer starting')

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [b.strip() for b in config_get('messaging-cache', 'brokers').split(',')]
    except:
        raise Exception('Could not load rucio cache brokers from configuration')

    logging.info('resolving rucio cache broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')])
    brokers_resolved = [item for sublist in brokers_resolved for item in sublist]

    logging.debug('Rucio cache brokers resolved to %s', brokers_resolved)

    conns = {}
    for broker in brokers_resolved:
        conn = stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-cache', 'port'))],
                                use_ssl=True,
                                ssl_key_file=config_get('messaging-cache', 'ssl_key_file'),
                                ssl_cert_file=config_get('messaging-cache', 'ssl_cert_file'),
                                ssl_version=ssl.PROTOCOL_TLSv1)
        conns[conn] = Consumer(conn.transport._Transport__host_and_ports[0], account=config_get('messaging-cache', 'account'), id=id, num_thread=num_thread)

    logging.info('consumer started')

    while not graceful_stop.is_set():

        for conn in conns:

            if not conn.is_connected():
                logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.messaging.cache.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])

                conn.set_listener('rucio-cache-messaging', conns[conn])
                conn.start()
                conn.connect()
                conn.subscribe(destination=config_get('messaging-cache', 'destination'),
                               id='rucio-cache-messaging',
                               ack='auto',
                               headers={'selector': 'vo = \'%s\'' % config_get('messaging-cache', 'voname')})

        time.sleep(1)

    logging.info('graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logging.info('graceful stop done')
Ejemplo n.º 19
0
def kronos_file(thread=0, dataset_queue=None, sleep_time=60):
    """
    Main loop to consume tracer reports.
    """

    logging.info('kronos_file[%i/?] starting', thread)

    executable = 'kronos-file'
    hostname = socket.gethostname()
    pid = getpid()
    hb_thread = current_thread()

    chunksize = config_get_int('tracer-kronos', 'chunksize')
    prefetch_size = config_get_int('tracer-kronos', 'prefetch_size')
    subscription_id = config_get('tracer-kronos', 'subscription_id')
    try:
        bad_files_patterns = []
        pattern = get(section='kronos',
                      option='bad_files_patterns',
                      session=None)
        pattern = str(pattern)
        patterns = pattern.split(",")
        for pat in patterns:
            bad_files_patterns.append(re.compile(pat.strip()))
    except ConfigNotFound:
        bad_files_patterns = []
    except Exception as error:
        logging.log(logging.ERROR,
                    'kronos_file[%i/?] Failed to get bad_file_patterns %s',
                    thread, str(error))
        bad_files_patterns = []

    use_ssl = True
    try:
        use_ssl = config_get_bool('tracer-kronos', 'use_ssl')
    except Exception:
        pass

    if not use_ssl:
        username = config_get('tracer-kronos', 'username')
        password = config_get('tracer-kronos', 'password')

    excluded_usrdns = set(
        config_get('tracer-kronos', 'excluded_usrdns').split(','))
    vhost = config_get('tracer-kronos',
                       'broker_virtual_host',
                       raise_exception=False)

    brokers_alias = [
        b.strip() for b in config_get('tracer-kronos', 'brokers').split(',')
    ]
    port = config_get_int('tracer-kronos', 'port')
    reconnect_attempts = config_get_int('tracer-kronos', 'reconnect_attempts')
    ssl_key_file = config_get('tracer-kronos',
                              'ssl_key_file',
                              raise_exception=False)
    ssl_cert_file = config_get('tracer-kronos',
                               'ssl_cert_file',
                               raise_exception=False)

    sanity_check(executable=executable, hostname=hostname)
    while not graceful_stop.is_set():
        start_time = time()
        heart_beat = live(executable, hostname, pid, hb_thread)
        prepend_str = 'kronos-file[%i/%i] ' % (heart_beat['assign_thread'],
                                               heart_beat['nr_threads'])
        logger = formatted_logger(logging.log, prepend_str + '%s')
        conns = __get_broker_conns(brokers=brokers_alias,
                                   port=port,
                                   use_ssl=use_ssl,
                                   vhost=vhost,
                                   reconnect_attempts=reconnect_attempts,
                                   ssl_key_file=ssl_key_file,
                                   ssl_cert_file=ssl_cert_file,
                                   timeout=sleep_time,
                                   logger=logger)
        for conn in conns:
            if not conn.is_connected():
                logger(
                    logging.INFO, 'connecting to %s' %
                    str(conn.transport._Transport__host_and_ports[0]))
                record_counter('daemons.tracer.kronos.reconnect.%s' %
                               conn.transport._Transport__host_and_ports[0][0])
                conn.set_listener(
                    'rucio-tracer-kronos',
                    AMQConsumer(
                        broker=conn.transport._Transport__host_and_ports[0],
                        conn=conn,
                        queue=config_get('tracer-kronos', 'queue'),
                        chunksize=chunksize,
                        subscription_id=subscription_id,
                        excluded_usrdns=excluded_usrdns,
                        dataset_queue=dataset_queue,
                        bad_files_patterns=bad_files_patterns,
                        logger=logger))
                if not use_ssl:
                    conn.connect(username, password)
                else:
                    conn.connect()
                conn.subscribe(
                    destination=config_get('tracer-kronos', 'queue'),
                    ack='client-individual',
                    id=subscription_id,
                    headers={'activemq.prefetchSize': prefetch_size})

        tottime = time() - start_time
        if tottime < sleep_time:
            logger(logging.INFO,
                   'Will sleep for %s seconds' % (sleep_time - tottime))
            sleep(sleep_time - tottime)

    logger(logging.INFO, 'graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except Exception:
            pass

    die(executable=executable, hostname=hostname, pid=pid, thread=thread)
    logger(logging.INFO, 'graceful stop done')
Ejemplo n.º 20
0
def request_transfer(once=False, src=None, dst=None):
    """
    Main loop to request a new transfer.
    """

    logging.info('request: starting')

    site_a = 'RSE%s' % generate_uuid().upper()
    site_b = 'RSE%s' % generate_uuid().upper()

    scheme = 'https'
    impl = 'rucio.rse.protocols.webdav.Default'
    if not src.startswith('https://'):
        scheme = 'srm'
        impl = 'rucio.rse.protocols.srm.Default'
        srctoken = src.split(':')[0]
        dsttoken = dst.split(':')[0]

    tmp_proto = {
        'impl': impl,
        'scheme': scheme,
        'domains': {
            'lan': {'read': 1, 'write': 1, 'delete': 1},
            'wan': {'read': 1, 'write': 1, 'delete': 1}}}

    rse.add_rse(site_a)
    tmp_proto['hostname'] = src.split(':')[1][2:]
    tmp_proto['port'] = src.split(':')[2].split('/')[0]
    tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:])
    if scheme == 'srm':
        tmp_proto['extended_attributes'] = {'space_token': srctoken,
                                            'web_service_path': ''}
    rse.add_protocol(site_a, tmp_proto)

    tmp_proto = {
        'impl': impl,
        'scheme': scheme,
        'domains': {
            'lan': {'read': 1, 'write': 1, 'delete': 1},
            'wan': {'read': 1, 'write': 1, 'delete': 1}}}

    rse.add_rse(site_b)
    tmp_proto['hostname'] = dst.split(':')[1][2:]
    tmp_proto['port'] = dst.split(':')[2].split('/')[0]
    tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:])
    if scheme == 'srm':
        tmp_proto['extended_attributes'] = {'space_token': dsttoken,
                                            'web_service_path': ''}
    rse.add_protocol(site_b, tmp_proto)

    si = rsemanager.get_rse_info(site_a)

    session = get_session()

    logging.info('request: started')

    while not graceful_stop.is_set():

        try:

            ts = time.time()

            tmp_name = generate_uuid()

            # add a new dataset
            did.add_did(scope='mock', name='dataset-%s' % tmp_name,
                        type=DIDType.DATASET, account='root', session=session)

            # construct PFN
            pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name]

            # create the directories if needed
            p = rsemanager.create_protocol(si, operation='write', scheme=scheme)
            p.connect()
            try:
                p.mkdir(pfn)
            except:
                pass

            # upload the test file
            try:
                fp = os.path.dirname(config_get('injector', 'file'))
                fn = os.path.basename(config_get('injector', 'file'))
                p.put(fn, pfn, source_dir=fp)
            except:
                logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info()))
                did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session)
                break

            # add the replica
            replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name,
                                bytes=config_get_int('injector', 'bytes'),
                                adler32=config_get('injector', 'adler32'),
                                md5=config_get('injector', 'md5'),
                                account='root', session=session)

            # to the dataset
            did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock',
                                                                               'name': 'file-%s' % tmp_name,
                                                                               'bytes': config_get('injector', 'bytes')}],
                            account='root', session=session)

            # add rule for the dataset
            ts = time.time()

            rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}],
                          account='root',
                          copies=1,
                          rse_expression=site_b,
                          grouping='ALL',
                          weight=None,
                          lifetime=None,
                          locked=False,
                          subscription_id=None,
                          activity='mock-injector',
                          session=session)

            logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name))
            record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000)

            record_counter('daemons.mock.conveyorinjector.request_transfer')

            session.commit()
        except:
            session.rollback()
            logging.critical(traceback.format_exc())

        if once:
            return

    logging.info('request: graceful stop requested')

    logging.info('request: graceful stop done')
Ejemplo n.º 21
0
def submitter(once=False,
              rses=None,
              partition_wait_time=10,
              bulk=100,
              group_bulk=1,
              group_policy='rule',
              source_strategy=None,
              activities=None,
              sleep_time=600,
              max_sources=4,
              archive_timeout_override=None,
              filter_transfertool=FILTER_TRANSFERTOOL,
              transfertool=TRANSFER_TOOL,
              transfertype=TRANSFER_TYPE,
              ignore_availability=False):
    """
    Main loop to submit a new transfer primitive to a transfertool.
    """

    try:
        partition_hash_var = config_get('conveyor', 'partition_hash_var')
    except NoOptionError:
        partition_hash_var = None
    try:
        scheme = config_get('conveyor', 'scheme')
    except NoOptionError:
        scheme = None
    try:
        failover_scheme = config_get('conveyor', 'failover_scheme')
    except NoOptionError:
        failover_scheme = None
    try:
        timeout = config_get('conveyor', 'submit_timeout')
        timeout = float(timeout)
    except NoOptionError:
        timeout = None

    try:
        bring_online = config_get_int('conveyor', 'bring_online')
    except NoOptionError:
        bring_online = 43200

    try:
        max_time_in_queue = {}
        timelife_conf = config_get('conveyor', 'max_time_in_queue')
        timelife_confs = timelife_conf.split(",")
        for conf in timelife_confs:
            act, timelife = conf.split(":")
            max_time_in_queue[act.strip()] = int(timelife.strip())
    except NoOptionError:
        max_time_in_queue = {}

    if 'default' not in max_time_in_queue:
        max_time_in_queue['default'] = 168
    logging.debug("Maximum time in queue for different activities: %s",
                  max_time_in_queue)

    logger_prefix = executable = "conveyor-submitter"
    if activities:
        activities.sort()
        executable += '--activities ' + str(activities)
    if filter_transfertool:
        executable += ' --filter-transfertool ' + filter_transfertool
    if rses:
        rse_ids = [rse['id'] for rse in rses]
    else:
        rse_ids = None

    transfertools = transfertool.split(',')
    transfertool_kwargs = {
        FTS3Transfertool: {
            'group_policy': group_policy,
            'group_bulk': group_bulk,
            'source_strategy': source_strategy,
            'max_time_in_queue': max_time_in_queue,
            'bring_online': bring_online,
            'default_lifetime': 172800,
            'archive_timeout_override': archive_timeout_override,
        },
        GlobusTransferTool: {
            'group_policy': transfertype,
            'group_bulk': group_bulk,
        },
    }

    run_daemon(
        once=once,
        graceful_stop=graceful_stop,
        executable=executable,
        logger_prefix=logger_prefix,
        partition_wait_time=partition_wait_time,
        sleep_time=sleep_time,
        run_once_fnc=functools.partial(
            run_once,
            bulk=bulk,
            group_bulk=group_bulk,
            filter_transfertool=filter_transfertool,
            transfertools=transfertools,
            ignore_availability=ignore_availability,
            scheme=scheme,
            failover_scheme=failover_scheme,
            partition_hash_var=partition_hash_var,
            rse_ids=rse_ids,
            timeout=timeout,
            transfertool_kwargs=transfertool_kwargs,
        ),
        activities=activities,
    )
Ejemplo n.º 22
0
def kronos_file(once=False,
                thread=0,
                brokers_resolved=None,
                dataset_queue=None):
    """
    Main loop to consume tracer reports.
    """

    logging.info('tracer consumer starting')

    hostname = gethostname()
    pid = getpid()
    thread = current_thread()

    chunksize = config_get_int('tracer-kronos', 'chunksize')
    prefetch_size = config_get_int('tracer-kronos', 'prefetch_size')
    subscription_id = config_get('tracer-kronos', 'subscription_id')

    use_ssl = True
    try:
        use_ssl = config_get_bool('tracer-kronos', 'use_ssl')
    except:
        pass

    if not use_ssl:
        username = config_get('tracer-kronos', 'username')
        password = config_get('tracer-kronos', 'password')

    excluded_usrdns = set(
        config_get('tracer-kronos', 'excluded_usrdns').split(','))

    conns = []
    for broker in brokers_resolved:
        if not use_ssl:
            conns.append(
                Connection(host_and_ports=[
                    (broker, config_get_int('tracer-kronos', 'port'))
                ],
                           use_ssl=False,
                           reconnect_attempts_max=config_get_int(
                               'tracer-kronos', 'reconnect_attempts')))
        else:
            conns.append(
                Connection(host_and_ports=[
                    (broker, config_get_int('tracer-kronos', 'port'))
                ],
                           use_ssl=True,
                           ssl_key_file=config_get('tracer-kronos',
                                                   'ssl_key_file'),
                           ssl_cert_file=config_get('tracer-kronos',
                                                    'ssl_cert_file'),
                           ssl_version=PROTOCOL_TLSv1,
                           reconnect_attempts_max=config_get_int(
                               'tracer-kronos', 'reconnect_attempts')))

    logging.info('(kronos_file) tracer consumer started')

    sanity_check(executable='kronos-file', hostname=hostname)
    while not graceful_stop.is_set():
        live(executable='kronos-file',
             hostname=hostname,
             pid=pid,
             thread=thread)
        for conn in conns:
            if not conn.is_connected():
                logging.info('(kronos_file) connecting to %s' %
                             conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.tracer.kronos.reconnect.%s' %
                               conn.transport._Transport__host_and_ports[0]
                               [0].split('.')[0])
                conn.set_listener(
                    'rucio-tracer-kronos',
                    AMQConsumer(
                        broker=conn.transport._Transport__host_and_ports[0],
                        conn=conn,
                        queue=config_get('tracer-kronos', 'queue'),
                        chunksize=chunksize,
                        subscription_id=subscription_id,
                        excluded_usrdns=excluded_usrdns,
                        dataset_queue=dataset_queue))
                conn.start()
                if not use_ssl:
                    conn.connect(username, password)
                else:
                    conn.connect()
                conn.subscribe(
                    destination=config_get('tracer-kronos', 'queue'),
                    ack='client-individual',
                    id=subscription_id,
                    headers={'activemq.prefetchSize': prefetch_size})
        sleep(1)

    logging.info('(kronos_file) graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    die(executable='kronos-file', hostname=hostname, pid=pid, thread=thread)
    logging.info('(kronos_file) graceful stop done')
Ejemplo n.º 23
0
def kronos_file(once=False, process=0, total_processes=1, thread=0, total_threads=1, brokers_resolved=None, dataset_queue=None):
    """
    Main loop to consume tracer reports.
    """

    logging.info('tracer consumer starting')

    chunksize = config_get_int('tracer-kronos', 'chunksize')
    prefetch_size = config_get_int('tracer-kronos', 'prefetch_size')
    subscription_id = config_get('tracer-kronos', 'subscription_id')

    use_ssl = True
    try:
        use_ssl = config_get_bool('tracer-kronos', 'use_ssl')
    except:
        pass

    if not use_ssl:
        username = config_get('tracer-kronos', 'username')
        password = config_get('tracer-kronos', 'password')

    excluded_usrdns = set(config_get('tracer-kronos', 'excluded_usrdns').split(','))

    conns = []
    for broker in brokers_resolved:
        if not use_ssl:
            conns.append(Connection(host_and_ports=[(broker, config_get_int('tracer-kronos', 'port'))],
                                    use_ssl=False,
                                    reconnect_attempts_max=config_get_int('tracer-kronos', 'reconnect_attempts')))
        else:
            conns.append(Connection(host_and_ports=[(broker, config_get_int('tracer-kronos', 'port'))],
                                    use_ssl=True,
                                    ssl_key_file=config_get('tracer-kronos', 'ssl_key_file'),
                                    ssl_cert_file=config_get('tracer-kronos', 'ssl_cert_file'),
                                    ssl_version=PROTOCOL_TLSv1,
                                    reconnect_attempts_max=config_get_int('tracer-kronos', 'reconnect_attempts')))

    logging.info('(kronos_file) tracer consumer started')

    while not graceful_stop.is_set():
        for conn in conns:
            if not conn.is_connected():
                logging.info('(kronos_file) connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.tracer.kronos.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])
                conn.set_listener('rucio-tracer-kronos', AMQConsumer(broker=conn.transport._Transport__host_and_ports[0], conn=conn, chunksize=chunksize, subscription_id=subscription_id, excluded_usrdns=excluded_usrdns, dataset_queue=dataset_queue))
                conn.start()
                if not use_ssl:
                    conn.connect(username, password)
                else:
                    conn.connect()
                conn.subscribe(destination=config_get('tracer-kronos', 'queue'), ack='client-individual', id=subscription_id, headers={'activemq.prefetchSize': prefetch_size})
        sleep(1)

    logging.info('(kronos_file) graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logging.info('(kronos_file) graceful stop done')
Ejemplo n.º 24
0
logger.setLevel(logging.INFO)

handler = logging.handlers.RotatingFileHandler(filename='%s/trace' % config_get('trace', 'tracedir'), maxBytes=1000000000, backupCount=10)

logFormatter = logging.Formatter('%(message)s')
handler.setFormatter(logFormatter)
handler.suffix = "%Y-%m-%d"
logger.addHandler(handler)

brokers_alias = []
brokers_resolved = []
try:
    brokers_alias = [b.strip() for b in config_get('trace', 'brokers').split(',')]
except:
    raise Exception('Could not load brokers from configuration')
port = config_get_int('trace', 'port')
topic = config_get('trace', 'topic')
username = config_get('trace', 'username')
password = config_get('trace', 'password')

logging.getLogger("stomp").setLevel(logging.CRITICAL)

brokers_resolved = []
for broker in brokers_alias:
    brokers_resolved.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')])
    brokers_resolved = [item for sublist in brokers_resolved for item in sublist]

conns = []

for broker in brokers_resolved:
    conns.append(stomp.Connection(host_and_ports=[(broker, port)], reconnect_attempts_max=3))
Ejemplo n.º 25
0
 def __init__(self):
     self._r = StrictRedis(host=config_get('c3po-network-metrics',
                                           'redis_host'),
                           port=config_get_int('c3po-network-metrics',
                                               'redis_port'))
     self._prefix = config_get('c3po-network-metrics', 'prefix')
Ejemplo n.º 26
0
def deliver_messages(once=False,
                     brokers_resolved=None,
                     thread=0,
                     bulk=1000,
                     delay=10,
                     broker_timeout=3,
                     broker_retry=3):
    '''
    Main loop to deliver messages to a broker.
    '''
    logging.info('[broker] starting - threads (%i) bulk (%i)', thread, bulk)

    if not brokers_resolved:
        logging.fatal('No brokers resolved.')
        return

    if not broker_timeout:  # Allow zero in config
        broker_timeout = None

    logging.info('[broker] checking authentication method')
    use_ssl = True
    try:
        use_ssl = config_get_bool('messaging-hermes', 'use_ssl')
    except:
        logging.info(
            '[broker] could not find use_ssl in configuration -- please update your rucio.cfg'
        )

    port = config_get_int('messaging-hermes', 'port')
    vhost = config_get('messaging-hermes',
                       'broker_virtual_host',
                       raise_exception=False)
    if not use_ssl:
        username = config_get('messaging-hermes', 'username')
        password = config_get('messaging-hermes', 'password')
        port = config_get_int('messaging-hermes', 'nonssl_port')

    conns = []
    for broker in brokers_resolved:
        if not use_ssl:
            logging.info(
                '[broker] setting up username/password authentication: %s' %
                broker)
            con = stomp.Connection12(host_and_ports=[(broker, port)],
                                     vhost=vhost,
                                     keepalive=True,
                                     timeout=broker_timeout)
        else:
            logging.info(
                '[broker] setting up ssl cert/key authentication: %s' % broker)
            con = stomp.Connection12(
                host_and_ports=[(broker, port)],
                use_ssl=True,
                ssl_key_file=config_get('messaging-hermes', 'ssl_key_file'),
                ssl_cert_file=config_get('messaging-hermes', 'ssl_cert_file'),
                vhost=vhost,
                keepalive=True,
                timeout=broker_timeout)

        con.set_listener(
            'rucio-hermes',
            HermesListener(con.transport._Transport__host_and_ports[0]))

        conns.append(con)
    destination = config_get('messaging-hermes', 'destination')

    executable = 'hermes [broker]'
    hostname = socket.getfqdn()
    pid = os.getpid()
    heartbeat_thread = threading.current_thread()

    # Make an initial heartbeat so that all daemons have the correct worker number on the next try
    sanity_check(executable=executable,
                 hostname=hostname,
                 pid=pid,
                 thread=heartbeat_thread)
    GRACEFUL_STOP.wait(1)

    while not GRACEFUL_STOP.is_set():
        try:
            t_start = time.time()

            heartbeat = live(executable=executable,
                             hostname=hostname,
                             pid=pid,
                             thread=heartbeat_thread)

            logging.debug('[broker] %i:%i - using: %s',
                          heartbeat['assign_thread'], heartbeat['nr_threads'],
                          [
                              conn.transport._Transport__host_and_ports[0][0]
                              for conn in conns
                          ])

            messages = retrieve_messages(bulk=bulk,
                                         thread=heartbeat['assign_thread'],
                                         total_threads=heartbeat['nr_threads'])

            if messages:

                logging.debug('[broker] %i:%i - retrieved %i messages',
                              heartbeat['assign_thread'],
                              heartbeat['nr_threads'], len(messages))
                to_delete = []
                for message in messages:
                    try:
                        conn = random.sample(conns, 1)[0]
                        if not conn.is_connected():
                            host_and_ports = conn.transport._Transport__host_and_ports[
                                0][0]
                            record_counter('daemons.hermes.reconnect.%s' %
                                           host_and_ports.split('.')[0])
                            conn.start()
                            if not use_ssl:
                                logging.info(
                                    '[broker] %i:%i - connecting with USERPASS to %s',
                                    heartbeat['assign_thread'],
                                    heartbeat['nr_threads'], host_and_ports)
                                conn.connect(username, password, wait=True)
                            else:
                                logging.info(
                                    '[broker] %i:%i - connecting with SSL to %s',
                                    heartbeat['assign_thread'],
                                    heartbeat['nr_threads'], host_and_ports)
                                conn.connect(wait=True)

                        conn.send(body=json.dumps({
                            'event_type':
                            str(message['event_type']).lower(),
                            'payload':
                            message['payload'],
                            'created_at':
                            str(message['created_at'])
                        }),
                                  destination=destination,
                                  headers={
                                      'persistent':
                                      'true',
                                      'event_type':
                                      str(message['event_type']).lower()
                                  })

                        to_delete.append({
                            'id':
                            message['id'],
                            'created_at':
                            message['created_at'],
                            'updated_at':
                            message['created_at'],
                            'payload':
                            json.dumps(message['payload']),
                            'event_type':
                            message['event_type']
                        })
                    except ValueError:
                        logging.warn('Cannot serialize payload to JSON: %s',
                                     str(message['payload']))
                        to_delete.append({
                            'id': message['id'],
                            'created_at': message['created_at'],
                            'updated_at': message['created_at'],
                            'payload': str(message['payload']),
                            'event_type': message['event_type']
                        })
                        continue
                    except stomp.exception.NotConnectedException as error:
                        logging.warn(
                            'Could not deliver message due to NotConnectedException: %s',
                            str(error))
                        continue
                    except stomp.exception.ConnectFailedException as error:
                        logging.warn(
                            'Could not deliver message due to ConnectFailedException: %s',
                            str(error))
                        continue
                    except Exception as error:
                        logging.warn('Could not deliver message: %s',
                                     str(error))
                        logging.critical(traceback.format_exc())
                        continue

                    if str(message['event_type']).lower().startswith(
                            'transfer') or str(message['event_type']).lower(
                            ).startswith('stagein'):
                        logging.debug(
                            '[broker] %i:%i - event_type: %s, scope: %s, name: %s, rse: %s, request-id: %s, transfer-id: %s, created_at: %s',
                            heartbeat['assign_thread'],
                            heartbeat['nr_threads'],
                            str(message['event_type']).lower(),
                            message['payload'].get('scope', None),
                            message['payload'].get('name', None),
                            message['payload'].get('dst-rse', None),
                            message['payload'].get('request-id', None),
                            message['payload'].get('transfer-id', None),
                            str(message['created_at']))

                    elif str(message['event_type']).lower().startswith(
                            'dataset'):
                        logging.debug(
                            '[broker] %i:%i - event_type: %s, scope: %s, name: %s, rse: %s, rule-id: %s, created_at: %s)',
                            heartbeat['assign_thread'],
                            heartbeat['nr_threads'],
                            str(message['event_type']).lower(),
                            message['payload']['scope'],
                            message['payload']['name'],
                            message['payload']['rse'],
                            message['payload']['rule_id'],
                            str(message['created_at']))

                    elif str(message['event_type']).lower().startswith(
                            'deletion'):
                        if 'url' not in message['payload']:
                            message['payload']['url'] = 'unknown'
                        logging.debug(
                            '[broker] %i:%i - event_type: %s, scope: %s, name: %s, rse: %s, url: %s, created_at: %s)',
                            heartbeat['assign_thread'],
                            heartbeat['nr_threads'],
                            str(message['event_type']).lower(),
                            message['payload']['scope'],
                            message['payload']['name'],
                            message['payload']['rse'],
                            message['payload']['url'],
                            str(message['created_at']))
                    else:
                        logging.debug('[broker] %i:%i - other message: %s',
                                      heartbeat['assign_thread'],
                                      heartbeat['nr_threads'], message)

                delete_messages(to_delete)
                logging.info('[broker] %i:%i - submitted %i messages',
                             heartbeat['assign_thread'],
                             heartbeat['nr_threads'], len(to_delete))

                if once:
                    break

        except NoResultFound:
            # silence this error: https://its.cern.ch/jira/browse/RUCIO-1699
            pass
        except:
            logging.critical(traceback.format_exc())

        t_delay = delay - (time.time() - t_start)
        t_delay = t_delay if t_delay > 0 else 0
        if t_delay:
            logging.debug('[broker] %i:%i - sleeping %s seconds',
                          heartbeat['assign_thread'], heartbeat['nr_threads'],
                          t_delay)
        time.sleep(t_delay)

    for conn in conns:
        try:
            conn.disconnect()
        except Exception:
            pass

    logging.debug('[broker] %i:%i - graceful stop requested',
                  heartbeat['assign_thread'], heartbeat['nr_threads'])

    die(executable, hostname, pid, heartbeat_thread)

    logging.debug('[broker] %i:%i - graceful stop done',
                  heartbeat['assign_thread'], heartbeat['nr_threads'])
Ejemplo n.º 27
0
def request_transfer(loop=1,
                     src=None,
                     dst=None,
                     upload=False,
                     same_src=False,
                     same_dst=False):
    """
    Main loop to request a new transfer.
    """

    logging.info('request: starting')

    session = get_session()
    src_rse = generate_rse(
        src, ''.join(random.sample(string.ascii_letters.upper(), 8)))
    dst_rse = generate_rse(
        dst, ''.join(random.sample(string.ascii_letters.upper(), 8)))

    logging.info('request: started')

    i = 0
    while not graceful_stop.is_set():

        if i >= loop:
            return

        try:

            if not same_src:
                src_rse = generate_rse(
                    src, ''.join(random.sample(string.ascii_letters.upper(),
                                               8)))

            if not same_dst:
                dst_rse = generate_rse(
                    dst, ''.join(random.sample(string.ascii_letters.upper(),
                                               8)))

            tmp_name = generate_uuid()

            # add a new dataset
            scope = InternalScope('mock')
            account = InternalAccount('root')
            did.add_did(scope=scope,
                        name='dataset-%s' % tmp_name,
                        type=DIDType.DATASET,
                        account=account,
                        session=session)

            # construct PFN
            pfn = rsemanager.lfns2pfns(src_rse,
                                       lfns=[{
                                           'scope': scope.external,
                                           'name': 'file-%s' % tmp_name
                                       }])['%s:file-%s' %
                                           (scope.external, tmp_name)]

            if upload:
                # create the directories if needed
                p = rsemanager.create_protocol(src_rse,
                                               operation='write',
                                               scheme='srm')
                p.connect()
                try:
                    p.mkdir(pfn)
                except:
                    pass

                # upload the test file
                try:
                    fp = os.path.dirname(config_get('injector', 'file'))
                    fn = os.path.basename(config_get('injector', 'file'))
                    p.put(fn, pfn, source_dir=fp)
                except:
                    logging.critical(
                        'Could not upload, removing temporary DID: %s' %
                        str(sys.exc_info()))
                    did.delete_dids([{
                        'scope': scope,
                        'name': 'dataset-%s' % tmp_name
                    }],
                                    account=account,
                                    session=session)
                    break

            # add the replica
            replica.add_replica(rse_id=src_rse['id'],
                                scope=scope,
                                name='file-%s' % tmp_name,
                                bytes=config_get_int('injector', 'bytes'),
                                adler32=config_get('injector', 'adler32'),
                                md5=config_get('injector', 'md5'),
                                account=account,
                                session=session)
            logging.info('added replica on %s for DID mock:%s' %
                         (src_rse['rse'], tmp_name))

            # to the dataset
            did.attach_dids(scope=scope,
                            name='dataset-%s' % tmp_name,
                            dids=[{
                                'scope': scope,
                                'name': 'file-%s' % tmp_name,
                                'bytes': config_get('injector', 'bytes')
                            }],
                            account=account,
                            session=session)

            # add rule for the dataset
            rule.add_rule(dids=[{
                'scope': scope,
                'name': 'dataset-%s' % tmp_name
            }],
                          account=account,
                          copies=1,
                          rse_expression=dst_rse['rse'],
                          grouping='ALL',
                          weight=None,
                          lifetime=None,
                          locked=False,
                          subscription_id=None,
                          activity='mock-injector',
                          session=session)
            logging.info('added rule for %s for DID %s:%s' %
                         (dst_rse['rse'], scope, tmp_name))

            session.commit()
        except:
            session.rollback()
            logging.critical(traceback.format_exc())

        i += 1

    logging.info('request: graceful stop requested')

    logging.info('request: graceful stop done')
Ejemplo n.º 28
0
def receiver(id_, total_threads=1, full_mode=False, all_vos=False):
    """
    Main loop to consume messages from the FTS3 producer.
    """

    logging.info('receiver starting in full mode: %s' % full_mode)

    logger_prefix = executable = 'conveyor-receiver'

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [
            b.strip()
            for b in config_get('messaging-fts3', 'brokers').split(',')
        ]
    except Exception:
        raise Exception('Could not load brokers from configuration')

    logging.info('resolving broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0,
                                       socket.IPPROTO_TCP)
        brokers_resolved.extend(ai[4][0] for ai in addrinfos)

    logging.info('brokers resolved to %s', brokers_resolved)

    logging.info('checking authentication method')
    use_ssl = True
    try:
        use_ssl = config_get_bool('messaging-fts3', 'use_ssl')
    except:
        logging.info(
            'could not find use_ssl in configuration -- please update your rucio.cfg'
        )

    port = config_get_int('messaging-fts3', 'port')
    vhost = config_get('messaging-fts3',
                       'broker_virtual_host',
                       raise_exception=False)
    if not use_ssl:
        username = config_get('messaging-fts3', 'username')
        password = config_get('messaging-fts3', 'password')
        port = config_get_int('messaging-fts3', 'nonssl_port')

    conns = []
    for broker in brokers_resolved:
        if not use_ssl:
            logging.info('setting up username/password authentication: %s' %
                         broker)
            con = stomp.Connection12(host_and_ports=[(broker, port)],
                                     use_ssl=False,
                                     vhost=vhost,
                                     reconnect_attempts_max=999)
        else:
            logging.info('setting up ssl cert/key authentication: %s' % broker)
            con = stomp.Connection12(
                host_and_ports=[(broker, port)],
                use_ssl=True,
                ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'),
                ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'),
                vhost=vhost,
                reconnect_attempts_max=999)
        conns.append(con)

    logging.info('receiver started')

    with HeartbeatHandler(executable=executable,
                          renewal_interval=30,
                          logger_prefix=logger_prefix) as heartbeat_handler:

        while not graceful_stop.is_set():

            _, _, logger = heartbeat_handler.live()

            for conn in conns:

                if not conn.is_connected():
                    logger(
                        logging.INFO, 'connecting to %s' %
                        conn.transport._Transport__host_and_ports[0][0])
                    record_counter(
                        'daemons.messaging.fts3.reconnect.{host}',
                        labels={
                            'host':
                            conn.transport._Transport__host_and_ports[0]
                            [0].split('.')[0]
                        })

                    conn.set_listener(
                        'rucio-messaging-fts3',
                        Receiver(broker=conn.transport.
                                 _Transport__host_and_ports[0],
                                 id_=id_,
                                 total_threads=total_threads,
                                 full_mode=full_mode,
                                 all_vos=all_vos))
                    if not use_ssl:
                        conn.connect(username, password, wait=True)
                    else:
                        conn.connect(wait=True)
                    conn.subscribe(destination=config_get(
                        'messaging-fts3', 'destination'),
                                   id='rucio-messaging-fts3',
                                   ack='auto')
            time.sleep(1)

        for conn in conns:
            try:
                conn.disconnect()
            except Exception:
                pass
Ejemplo n.º 29
0
def setup_activemq(logger):
    """
    Deliver messages to ActiveMQ

    :param logger:             The logger object.
    """

    logger(logging.INFO, '[broker] Resolving brokers')

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [
            broker.strip()
            for broker in config_get('messaging-hermes', 'brokers').split(',')
        ]
    except:
        raise Exception('Could not load brokers from configuration')

    logger(logging.INFO, '[broker] Resolving broker dns alias: %s',
           brokers_alias)
    brokers_resolved = []
    for broker in brokers_alias:
        try:
            addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0,
                                           socket.IPPROTO_TCP)
            brokers_resolved.extend(ai[4][0] for ai in addrinfos)
        except socket.gaierror as ex:
            logger(logging.ERROR,
                   '[broker] Cannot resolve domain name %s (%s)', broker,
                   str(ex))

    logger(logging.DEBUG, '[broker] Brokers resolved to %s', brokers_resolved)

    if not brokers_resolved:
        logger(logging.FATAL, '[broker] No brokers resolved.')
        return None, None, None, None, None

    broker_timeout = 3
    if not broker_timeout:  # Allow zero in config
        broker_timeout = None

    logger(logging.INFO, '[broker] Checking authentication method')
    use_ssl = True
    try:
        use_ssl = config_get_bool('messaging-hermes', 'use_ssl')
    except:
        logger(
            logging.INFO,
            '[broker] Could not find use_ssl in configuration -- please update your rucio.cfg'
        )

    port = config_get_int('messaging-hermes', 'port')
    vhost = config_get('messaging-hermes',
                       'broker_virtual_host',
                       raise_exception=False)
    if not use_ssl:
        username = config_get('messaging-hermes', 'username')
        password = config_get('messaging-hermes', 'password')
        port = config_get_int('messaging-hermes', 'nonssl_port')

    conns = []
    for broker in brokers_resolved:
        if not use_ssl:
            logger(logging.INFO,
                   '[broker] setting up username/password authentication: %s',
                   broker)
            con = stomp.Connection12(host_and_ports=[(broker, port)],
                                     vhost=vhost,
                                     keepalive=True,
                                     timeout=broker_timeout)
        else:
            logger(logging.INFO,
                   '[broker] setting up ssl cert/key authentication: %s',
                   broker)
            con = stomp.Connection12(
                host_and_ports=[(broker, port)],
                use_ssl=True,
                ssl_key_file=config_get('messaging-hermes', 'ssl_key_file'),
                ssl_cert_file=config_get('messaging-hermes', 'ssl_cert_file'),
                vhost=vhost,
                keepalive=True,
                timeout=broker_timeout)

        con.set_listener(
            'rucio-hermes',
            HermesListener(con.transport._Transport__host_and_ports[0]))

        conns.append(con)
    destination = config_get('messaging-hermes', 'destination')
    return conns, destination, username, password, use_ssl
Ejemplo n.º 30
0
def consumer(id_, num_thread=1):
    """
    Main loop to consume messages from the Rucio Cache producer.
    """

    prepend_str = 'cache-consumer '
    logger = formatted_logger(logging.log, prepend_str + '%s')

    logger(logging.INFO, 'Rucio Cache consumer starting')

    try:
        brokers_alias = [
            b.strip()
            for b in config_get('messaging-cache', 'brokers').split(',')
        ]
    except:
        raise Exception(
            'Could not load rucio cache brokers from configuration')

    use_ssl = True
    try:
        use_ssl = config_get_bool('messaging-cache', 'use_ssl')
    except Exception:
        pass

    if not use_ssl:
        username = config_get('messaging-cache', 'username')
        password = config_get('messaging-cache', 'password')
    destination = config_get('messaging-cache', 'destination')
    subscription_id = 'rucio-cache-messaging'

    vhost = config_get('messaging-cache',
                       'broker_virtual_host',
                       raise_exception=False)
    port = config_get_int('messaging-cache', 'port')
    reconnect_attempts = config_get_int('messaging-cache',
                                        'reconnect_attempts',
                                        default=100)
    ssl_key_file = config_get('messaging-cache',
                              'ssl_key_file',
                              raise_exception=False)
    ssl_cert_file = config_get('messaging-cache',
                               'ssl_cert_file',
                               raise_exception=False)

    conns = get_stomp_brokers(brokers=brokers_alias,
                              port=port,
                              use_ssl=use_ssl,
                              vhost=vhost,
                              reconnect_attempts=reconnect_attempts,
                              ssl_key_file=ssl_key_file,
                              ssl_cert_file=ssl_cert_file,
                              timeout=None,
                              logger=logger)

    logger(logging.INFO, 'consumer started')

    while not GRACEFUL_STOP.is_set():
        for conn in conns:
            if not conn.is_connected():
                host_port = conn.transport._Transport__host_and_ports[0]

                logger(logging.INFO, 'connecting to %s' % host_port[0])
                record_counter('daemons.messaging.cache.reconnect.{host}',
                               labels={'host': host_port[0]})
                conn.set_listener(
                    'rucio-cache-consumer',
                    AMQConsumer(broker=host_port, conn=conn, logger=logger))
                if not use_ssl:
                    conn.connect(username, password)
                else:
                    conn.connect()

                conn.subscribe(destination=destination,
                               ack='auto',
                               id=subscription_id)
        time.sleep(1)

    logger(logging.INFO, 'graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logger(logging.INFO, 'graceful stop done')
Ejemplo n.º 31
0
def __assign_paths_to_transfertool_and_create_hops(
    candidate_paths_by_request_id: "Dict[str: List[DirectTransferDefinition]]",
    transfertool_classes: "Optional[List[Type[Transfertool]]]" = None,
    logger: "Callable" = logging.log,
    session: "Optional[Session]" = None,
) -> "Tuple[Dict[TransferToolBuilder, List[DirectTransferDefinition]], Set[str]]":
    """
    for each request, pick the first path which can be submitted by one of the transfertools.
    If the chosen path is multihop, create all missing intermediate requests and replicas.
    """
    reqs_no_host = set()
    paths_by_transfertool_builder = {}
    default_tombstone_delay = config_get_int(
        'transfers',
        'multihop_tombstone_delay',
        default=transfer_core.DEFAULT_MULTIHOP_TOMBSTONE_DELAY,
        expiration_time=600)
    for request_id, candidate_paths in candidate_paths_by_request_id.items():
        # Get the rws object from any candidate path. It is the same for all candidate paths. For multihop, the initial request is the last hop
        rws = candidate_paths[0][-1].rws

        # Selects the first path which can be submitted using a chain of supported transfertools
        # and for which the creation of intermediate hops (if it is a multihop) works correctly
        best_path = None
        builder_to_use = None
        hops_to_submit = []
        must_skip_submission = False

        tt_assignments = [(transfer_path,
                           __assign_to_transfertool(transfer_path,
                                                    transfertool_classes,
                                                    logger=logger))
                          for transfer_path in candidate_paths]
        # Prioritize the paths which need less transfertool transitions.
        # Ideally, the entire path should be submitted to a single transfertool
        for transfer_path, tt_assignment in sorted(tt_assignments,
                                                   key=lambda t: len(t[1])):
            if not tt_assignment:
                logger(
                    logging.INFO,
                    '%s: None of the transfertools can submit the request: %s',
                    request_id, [c.__name__ for c in transfertool_classes])
                continue

            # Set the 'transfertool' field on the intermediate hops which should be created in the database
            for sub_path, tt_builder in tt_assignment:
                if tt_builder:
                    for hop in sub_path:
                        if hop is not transfer_path[-1]:
                            hop.rws.transfertool = tt_builder.transfertool_class.external_name
            created, must_skip_submission = __create_missing_replicas_and_requests(
                transfer_path,
                default_tombstone_delay,
                logger=logger,
                session=session)
            if created:
                best_path = transfer_path
                # Only the first sub-path will be submitted to the corresponding transfertool,
                # the rest of the hops will wait for first hops to be transferred
                hops_to_submit, builder_to_use = tt_assignment[0]
            if created or must_skip_submission:
                break

        if not best_path:
            reqs_no_host.add(request_id)
            logger(
                logging.INFO,
                '%s: Cannot pick transfertool, or create intermediate requests'
                % request_id)
            continue

        transfer_core.ensure_db_sources(best_path,
                                        logger=logger,
                                        session=session)

        if len(best_path) > 1:
            logger(
                logging.INFO, '%s: Best path is multihop: %s' %
                (rws.request_id, transfer_core.transfer_path_str(best_path)))
        elif best_path is not candidate_paths[0] or len(
                best_path[0].sources) > 1:
            # Only print singlehop if it brings additional information:
            # - either it's not the first candidate path
            # - or it's a multi-source
            # in other cases, it doesn't bring any additional information to what is known from previous logs
            logger(
                logging.INFO, '%s: Best path is direct: %s' %
                (rws.request_id, transfer_core.transfer_path_str(best_path)))

        if must_skip_submission:
            logger(
                logging.INFO,
                '%s: Part of the transfer is already being handled. Skip for now.'
                % request_id)
            continue

        if len(hops_to_submit) < len(best_path):
            logger(logging.INFO,
                   '%s: Only first %d hops will be submitted by %s',
                   request_id, len(hops_to_submit), builder_to_use)

        paths_by_transfertool_builder.setdefault(builder_to_use,
                                                 []).append(hops_to_submit)
    return paths_by_transfertool_builder, reqs_no_host
Ejemplo n.º 32
0
def deliver_messages(once=False, brokers_resolved=None, process=0, total_processes=1, thread=0, total_threads=1, bulk=1000):
    """
    Main loop to deliver messages to a broker.
    """

    logging.info('hermes starting - process (%i/%i) thread (%i/%i) bulk (%i)' % (process, total_processes,
                                                                                 thread, total_threads,
                                                                                 bulk))
    conns = []
    for broker in brokers_resolved:
        conns.append(stomp.Connection(host_and_ports=[(broker, config_get_int('messaging-hermes', 'port'))],
                                      use_ssl=True,
                                      ssl_key_file=config_get('messaging-hermes', 'ssl_key_file'),
                                      ssl_cert_file=config_get('messaging-hermes', 'ssl_cert_file'),
                                      ssl_version=ssl.PROTOCOL_TLSv1))

    logging.info('hermes started - process (%i/%i) thread (%i/%i) bulk (%i)' % (process, total_processes,
                                                                                thread, total_threads,
                                                                                bulk))

    while not graceful_stop.is_set():

        try:
            for conn in conns:

                if not conn.is_connected():
                    logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                    record_counter('daemons.hermes.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])

                    conn.start()
                    conn.connect()

            tmp = retrieve_messages(bulk=bulk,
                                    process=process,
                                    total_processes=total_processes,
                                    thread=thread,
                                    total_threads=total_threads)
            if tmp == []:
                time.sleep(1)
            else:
                to_delete = []
                for t in tmp:
                    try:
                        random.sample(conns, 1)[0].send(body=json.dumps({'event_type': str(t['event_type']).lower(),
                                                                         'payload': t['payload'],
                                                                         'created_at': str(t['created_at'])}),
                                                        destination=config_get('messaging-hermes', 'destination'))
                    except ValueError:
                        logging.warn('Cannot serialize payload to JSON: %s' % str(t['payload']))
                        continue
                    except Exception, e:
                        logging.warn('Could not deliver message: %s' % str(e))
                        continue

                    to_delete.append(t['id'])

                    if str(t['event_type']).lower().startswith("transfer"):
                        logging.debug('%i:%i - event_type: %s, scope: %s, name: %s, rse: %s, request-id: %s, transfer-id: %s, created_at: %s' % (process,
                                                                                                                                                 thread,
                                                                                                                                                 str(t['event_type']).lower(),
                                                                                                                                                 t['payload']['scope'],
                                                                                                                                                 t['payload']['name'],
                                                                                                                                                 t['payload']['dst-rse'],
                                                                                                                                                 t['payload']['request-id'],
                                                                                                                                                 t['payload']['transfer-id'],
                                                                                                                                                 str(t['created_at'])))
                    elif str(t['event_type']).lower().startswith("dataset"):
                        logging.debug('%i:%i - event_type: %s, scope: %s, name: %s, rse: %s, rule-id: %s, created_at: %s)' % (process,
                                                                                                                              thread,
                                                                                                                              str(t['event_type']).lower(),
                                                                                                                              t['payload']['scope'],
                                                                                                                              t['payload']['name'],
                                                                                                                              t['payload']['rse'],
                                                                                                                              t['payload']['rule_id'],
                                                                                                                              str(t['created_at'])))
                    elif str(t['event_type']).lower().startswith("deletion"):
                        if 'url' not in t['payload']:
                            t['payload']['url'] = 'unknown'
                        logging.debug('%i:%i - event_type: %s, scope: %s, name: %s, rse: %s, url: %s, created_at: %s)' % (process,
                                                                                                                          thread,
                                                                                                                          str(t['event_type']).lower(),
                                                                                                                          t['payload']['scope'],
                                                                                                                          t['payload']['name'],
                                                                                                                          t['payload']['rse'],
                                                                                                                          t['payload']['url'],
                                                                                                                          str(t['created_at'])))

                    else:
                        logging.debug('%i:%i -other message: %s' % (process, thread, t))

                delete_messages(to_delete)
        except:
            logging.critical(traceback.format_exc())

    logging.debug('%i:%i - graceful stop requests' % (process, thread))

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    logging.debug('%i:%i - graceful stop done' % (process, thread))
Ejemplo n.º 33
0
    HANDLER.setFormatter(LOGFORMATTER)
    HANDLER.suffix = "%Y-%m-%d"
    LOGGER.addHandler(HANDLER)
except:
    if 'sphinx' not in sys.modules:
        raise

BROKERS_ALIAS, BROKERS_RESOLVED = [], []
try:
    BROKERS_ALIAS = [b.strip() for b in config_get('nongrid-trace', 'brokers').split(',')]
except:
    if 'sphinx' not in sys.modules:
        raise Exception('Could not load brokers from configuration')

try:
    PORT = config_get_int('nongrid-trace', 'port')
    TOPIC = config_get('nongrid-trace', 'topic')
    USERNAME = config_get('nongrid-trace', 'username')
    PASSWORD = config_get('nongrid-trace', 'password')
except:
    if 'sphinx' not in sys.modules:
        raise

logging.getLogger("stomp").setLevel(logging.CRITICAL)

for broker in BROKERS_ALIAS:
    try:
        BROKERS_RESOLVED.append([str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')])
        BROKERS_RESOLVED = [item for sublist in BROKERS_RESOLVED for item in sublist]
    except:
        pass
Ejemplo n.º 34
0
def stager(once=False,
           rses=None,
           bulk=100,
           group_bulk=1,
           group_policy='rule',
           source_strategy=None,
           activities=None,
           sleep_time=600):
    """
    Main loop to submit a new transfer primitive to a transfertool.
    """

    try:
        scheme = config_get('conveyor', 'scheme')
    except NoOptionError:
        scheme = None

    try:
        failover_scheme = config_get('conveyor', 'failover_scheme')
    except NoOptionError:
        failover_scheme = None

    try:
        bring_online = config_get_int('conveyor', 'bring_online')
    except NoOptionError:
        bring_online = 43200

    try:
        max_time_in_queue = {}
        timelife_conf = config_get('conveyor', 'max_time_in_queue')
        timelife_confs = timelife_conf.split(",")
        for conf in timelife_confs:
            act, timelife = conf.split(":")
            max_time_in_queue[act.strip()] = int(timelife.strip())
    except NoOptionError:
        max_time_in_queue = {}
    if 'default' not in max_time_in_queue:
        max_time_in_queue['default'] = 168
    logging.debug("Maximum time in queue for different activities: %s" %
                  max_time_in_queue)

    logger_prefix = executable = 'conveyor-stager'
    if activities:
        activities.sort()
        executable += '--activities ' + str(activities)

    if rses:
        rse_ids = [rse['id'] for rse in rses]
    else:
        rse_ids = None

    transfertool_kwargs = {
        FTS3Transfertool: {
            'group_policy': group_policy,
            'group_bulk': group_bulk,
            'source_strategy': source_strategy,
            'max_time_in_queue': max_time_in_queue,
            'bring_online': bring_online,
            'default_lifetime': -1,
        }
    }

    run_daemon(
        once=once,
        graceful_stop=graceful_stop,
        executable=executable,
        logger_prefix=logger_prefix,
        partition_wait_time=None,
        sleep_time=sleep_time,
        run_once_fnc=functools.partial(
            run_once,
            bulk=bulk,
            group_bulk=group_bulk,
            scheme=scheme,
            failover_scheme=failover_scheme,
            rse_ids=rse_ids,
            transfertool_kwargs=transfertool_kwargs,
        ),
        activities=activities,
    )
Ejemplo n.º 35
0
def receiver(id, total_threads=1, full_mode=False):
    """
    Main loop to consume messages from the FTS3 producer.
    """

    logging.info('receiver starting in full mode: %s' % full_mode)

    executable = ' '.join(sys.argv)
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()

    heartbeat.sanity_check(executable=executable, hostname=hostname)
    # Make an initial heartbeat so that all finishers have the correct worker number on the next try
    heartbeat.live(executable, hostname, pid, hb_thread)

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [b.strip() for b in config_get('messaging-fts3', 'brokers').split(',')]
    except Exception:
        raise Exception('Could not load brokers from configuration')

    logging.info('resolving broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP)
        brokers_resolved.extend(ai[4][0] for ai in addrinfos)

    logging.info('brokers resolved to %s', brokers_resolved)

    logging.info('checking authentication method')
    use_ssl = True
    try:
        use_ssl = config_get_bool('messaging-fts3', 'use_ssl')
    except:
        logging.info('could not find use_ssl in configuration -- please update your rucio.cfg')

    port = config_get_int('messaging-fts3', 'port')
    vhost = config_get('messaging-fts3', 'broker_virtual_host', raise_exception=False)
    if not use_ssl:
        username = config_get('messaging-fts3', 'username')
        password = config_get('messaging-fts3', 'password')
        port = config_get_int('messaging-fts3', 'nonssl_port')

    conns = []
    for broker in brokers_resolved:
        if not use_ssl:
            logging.info('setting up username/password authentication: %s' % broker)
            con = stomp.Connection12(host_and_ports=[(broker, port)],
                                     use_ssl=False,
                                     vhost=vhost,
                                     reconnect_attempts_max=999)
        else:
            logging.info('setting up ssl cert/key authentication: %s' % broker)
            con = stomp.Connection12(host_and_ports=[(broker, port)],
                                     use_ssl=True,
                                     ssl_key_file=config_get('messaging-fts3', 'ssl_key_file'),
                                     ssl_cert_file=config_get('messaging-fts3', 'ssl_cert_file'),
                                     vhost=vhost,
                                     reconnect_attempts_max=999)
        conns.append(con)

    logging.info('receiver started')

    while not graceful_stop.is_set():

        heartbeat.live(executable, hostname, pid, hb_thread)

        for conn in conns:

            if not conn.is_connected():
                logging.info('connecting to %s' % conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.messaging.fts3.reconnect.%s' % conn.transport._Transport__host_and_ports[0][0].split('.')[0])

                conn.set_listener('rucio-messaging-fts3', Receiver(broker=conn.transport._Transport__host_and_ports[0], id=id, total_threads=total_threads, full_mode=full_mode))
                conn.start()
                if not use_ssl:
                    conn.connect(username, password, wait=True)
                else:
                    conn.connect(wait=True)
                conn.subscribe(destination=config_get('messaging-fts3', 'destination'),
                               id='rucio-messaging-fts3',
                               ack='auto')

        time.sleep(1)

    logging.info('receiver graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except Exception:
            pass

    heartbeat.die(executable, hostname, pid, hb_thread)

    logging.info('receiver graceful stop done')
Ejemplo n.º 36
0
def list_rebalance_rule_candidates(rse_id, mode=None, session=None):
    """
    List the rebalance rule candidates based on the agreed on specification
    :param rse_id:       RSE of the source.
    :param mode:         Rebalancing mode.
    :param session:      DB Session.
    """

    vo = get_rse_vo(rse_id=rse_id)

    # dumps can be applied only for decommission since the dumps doesn't contain info from dids
    if mode == 'decommission':
        return _list_rebalance_rule_candidates_dump(rse_id, mode)

    # If no decommissioning use SQLAlchemy

    # Rules constraints. By default only moves rules in state OK that have no children and have only one copy
    # Additional constraints can be imposed by setting specific configuration
    rule_clause = [
        models.ReplicationRule.state == RuleState.OK,
        models.ReplicationRule.child_rule_id.is_(None),
        models.ReplicationRule.copies == 1
    ]

    # Only move rules w/o expiration date, or rules with expiration_date > >min_expires_date_in_days> days
    expiration_clause = models.ReplicationRule.expires_at.is_(None)
    min_expires_date_in_days = config_get_int(
        section='bb8',
        option='min_expires_date_in_days',
        raise_exception=False,
        default=-1,
        expiration_time=3600)
    if min_expires_date_in_days > 0:
        min_expires_date_in_days = datetime.utcnow() + timedelta(
            days=min_expires_date_in_days)
        expiration_clause = or_(
            models.ReplicationRule.expires_at > min_expires_date_in_days,
            models.ReplicationRule.expires_at.is_(None))
    rule_clause.append(expiration_clause)

    # Only move rules which were created more than <min_created_days> days ago
    min_created_days = config_get_int(section='bb8',
                                      option='min_created_days',
                                      raise_exception=False,
                                      default=-1,
                                      expiration_time=3600)
    if min_created_days > 0:
        min_created_days = datetime.now() - timedelta(days=min_created_days)
        rule_clause.append(
            models.ReplicationRule.created_at < min_created_days)

    # Only move rules which are owned by <allowed_accounts> (coma separated accounts, e.g. panda,root,ddmadmin,jdoe)
    allowed_accounts = config_get(section='bb8',
                                  option='allowed_accounts',
                                  raise_exception=False,
                                  default=None,
                                  expiration_time=3600)
    if allowed_accounts:
        allowed_accounts = [
            InternalAccount(acc.strip(' '), vo=vo)
            for acc in allowed_accounts.split(',')
        ]
        rule_clause.append(
            models.ReplicationRule.account.in_(allowed_accounts))

    # Only move rules that have a certain grouping <allowed_grouping> (accepted values : all, dataset, none)
    rule_grouping_mapping = {
        'all': RuleGrouping.ALL,
        'dataset': RuleGrouping.DATASET,
        'none': RuleGrouping.NONE
    }
    allowed_grouping = config_get(section='bb8',
                                  option='allowed_grouping',
                                  raise_exception=False,
                                  default=None,
                                  expiration_time=3600)
    if allowed_grouping:
        rule_clause.append(models.ReplicationRule.grouping ==
                           rule_grouping_mapping.get(allowed_grouping))

    # DIDs constraints. By default only moves rules of DID where we can compute the size
    # Additional constraints can be imposed by setting specific configuration
    did_clause = [models.DataIdentifier.bytes.isnot(None)]

    type_to_did_type_mapping = {
        'all': [DIDType.CONTAINER, DIDType.DATASET, DIDType.FILE],
        'collection': [DIDType.CONTAINER, DIDType.DATASET],
        'container': [DIDType.CONTAINER],
        'dataset': [DIDType.DATASET],
        'file': [DIDType.FILE]
    }

    # Only allows to migrate rules of a certain did_type <allowed_did_type> (accepted values : all, collection, container, dataset, file)
    allowed_did_type = config_get(section='bb8',
                                  option='allowed_did_type',
                                  raise_exception=False,
                                  default=None,
                                  expiration_time=3600)
    if allowed_did_type:
        allowed_did_type = [
            models.DataIdentifier.did_type == did_type
            for did_type in type_to_did_type_mapping.get(allowed_did_type)
        ]
        did_clause.append(or_(allowed_did_type))

    # Only allows to migrate rules of closed DID is <only_move_closed_did> is set
    only_move_closed_did = config_get_bool(section='bb8',
                                           option='only_move_closed_did',
                                           raise_exception=False,
                                           default=None,
                                           expiration_time=3600)
    if only_move_closed_did:
        did_clause.append(models.DataIdentifier.is_open == False)  # NOQA

    # Now build the query
    external_dsl = aliased(models.DatasetLock)
    count_locks = select([func.count()]).where(
        and_(external_dsl.scope == models.DatasetLock.scope,
             external_dsl.name == models.DatasetLock.name,
             external_dsl.rse_id == models.DatasetLock.rse_id)).as_scalar()
    query = session.query(models.DatasetLock.scope,
                          models.DatasetLock.name,
                          models.ReplicationRule.id,
                          models.ReplicationRule.rse_expression,
                          models.ReplicationRule.subscription_id,
                          models.DataIdentifier.bytes,
                          models.DataIdentifier.length,
                          case([(or_(models.DatasetLock.length < 1, models.DatasetLock.length.is_(None)), 0)],
                               else_=cast(models.DatasetLock.bytes / models.DatasetLock.length, BigInteger))).\
        join(models.ReplicationRule, models.ReplicationRule.id == models.DatasetLock.rule_id).\
        join(models.DataIdentifier, and_(models.DatasetLock.scope == models.DataIdentifier.scope, models.DatasetLock.name == models.DataIdentifier.name)).\
        filter(models.DatasetLock.rse_id == rse_id).\
        filter(and_(*rule_clause)).\
        filter(and_(*did_clause)).\
        filter(case([(or_(models.DatasetLock.length < 1, models.DatasetLock.length.is_(None)), 0)],
                    else_=cast(models.DatasetLock.bytes / models.DatasetLock.length, BigInteger)) > 1000000000).\
        filter(count_locks == 1)
    summary = query.order_by(
        case([(or_(models.DatasetLock.length < 1,
                   models.DatasetLock.length.is_(None)), 0)],
             else_=cast(models.DatasetLock.bytes / models.DatasetLock.length,
                        BigInteger)), models.DatasetLock.accessed_at).all()
    return summary
Ejemplo n.º 37
0
def receiver(id, total_threads=1, full_mode=False):
    """
    Main loop to consume messages from the FTS3 producer.
    """

    logging.info('receiver starting in full mode: %s' % full_mode)

    executable = ' '.join(sys.argv)
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()

    heartbeat.sanity_check(executable=executable, hostname=hostname)
    # Make an initial heartbeat so that all finishers have the correct worker number on the next try
    heartbeat.live(executable, hostname, pid, hb_thread)

    brokers_alias = []
    brokers_resolved = []
    try:
        brokers_alias = [
            b.strip()
            for b in config_get('messaging-fts3', 'brokers').split(',')
        ]
    except:
        raise Exception('Could not load brokers from configuration')

    logging.info('resolving broker dns alias: %s' % brokers_alias)

    brokers_resolved = []
    for broker in brokers_alias:
        brokers_resolved.append([
            str(tmp_broker) for tmp_broker in dns.resolver.query(broker, 'A')
        ])
    brokers_resolved = [
        item for sublist in brokers_resolved for item in sublist
    ]

    logging.info('brokers resolved to %s', brokers_resolved)

    conns = []
    for broker in brokers_resolved:
        conns.append(
            stomp.Connection(host_and_ports=[
                (broker, config_get_int('messaging-fts3', 'port'))
            ],
                             use_ssl=True,
                             ssl_key_file=config_get('messaging-fts3',
                                                     'ssl_key_file'),
                             ssl_cert_file=config_get('messaging-fts3',
                                                      'ssl_cert_file'),
                             ssl_version=ssl.PROTOCOL_TLSv1,
                             reconnect_attempts_max=999))

    logging.info('receiver started')

    while not graceful_stop.is_set():

        heartbeat.live(executable, hostname, pid, hb_thread)

        for conn in conns:

            if not conn.is_connected():
                logging.info('connecting to %s' %
                             conn.transport._Transport__host_and_ports[0][0])
                record_counter('daemons.messaging.fts3.reconnect.%s' %
                               conn.transport._Transport__host_and_ports[0]
                               [0].split('.')[0])

                conn.set_listener(
                    'rucio-messaging-fts3',
                    Receiver(
                        broker=conn.transport._Transport__host_and_ports[0],
                        id=id,
                        total_threads=total_threads,
                        full_mode=full_mode))
                conn.start()
                conn.connect()
                conn.subscribe(destination=config_get('messaging-fts3',
                                                      'destination'),
                               id='rucio-messaging-fts3',
                               ack='auto')

        time.sleep(1)

    logging.info('receiver graceful stop requested')

    for conn in conns:
        try:
            conn.disconnect()
        except:
            pass

    heartbeat.die(executable, hostname, pid, hb_thread)

    logging.info('receiver graceful stop done')
Ejemplo n.º 38
0
from rucio.common.config import config_get, config_get_bool, config_get_int
from rucio.core import config as config_core
from rucio.core.rse import get_rse_id, get_rse_transfer_limits

queue_mode = config_get('conveyor', 'queue_mode', False, 'default')
if queue_mode.upper() == 'STRICT':
    queue_mode = 'strict'

config_memcache = config_get('conveyor', 'using_memcache', False, 'False')
if config_memcache.upper() == 'TRUE':
    using_memcache = True
else:
    using_memcache = False

REGION_SHORT = make_region_memcached(
    expiration_time=config_get_int('conveyor', 'cache_time', False, 600))


def get_transfer_limits(activity, rse_id, logger=logging.log):
    """
    Get RSE transfer limits.

    :param activity:  The activity.
    :param rse_id:    The RSE id.
    :param logger:    Optional decorated logger that can be passed from the calling daemons or servers.

    :returns: max_transfers if exists else None.
    """
    try:
        if queue_mode == 'strict':
            threshold = get_config_limit(activity, rse_id)