Ejemplo n.º 1
0
    def get(self, scope_name):
        """
        List all meta of a data identifier.

        .. :quickref: Meta; List DID metadata.

        :resheader Content-Type: application/json
        :param scope_name: data identifier (scope)/(name).
        :status 200: DID found
        :status 401: Invalid Auth Token
        :status 404: DID not found
        :status 406: Not Acceptable
        :returns: A dictionary containing all meta.
        """
        try:
            scope, name = parse_scope_name(scope_name, request.environ.get('vo'))
        except ValueError as error:
            return generate_http_error_flask(400, error)

        try:
            plugin = request.args.get('plugin', default='DID_COLUMN')
            meta = get_metadata(scope=scope, name=name, plugin=plugin, vo=request.environ.get('vo'))
            return Response(render_json(**meta), content_type='application/json')
        except DataIdentifierNotFound as error:
            return generate_http_error_flask(404, error)
Ejemplo n.º 2
0
    def get(self, scope, name):
        """
        List all meta of a data identifier.

        .. :quickref: Meta; List DID metadata.

        :resheader Content-Type: application/json
        :param scope: The scope of the data identifier.
        :param name: The name of the data identifier.
        :status 200: DID found
        :status 401: Invalid Auth Token
        :status 404: DID not found
        :status 500: Database Exception
        :returns: A dictionary containing all meta.
        """
        try:
            meta = get_metadata(scope=scope, name=name)
            return Response(render_json(**meta),
                            content_type='application/json')
        except DataIdentifierNotFound as error:
            return generate_http_error_flask(404, 'DataIdentifierNotFound',
                                             error.args[0])
        except RucioException as error:
            return generate_http_error_flask(500, error.__class__.__name__,
                                             error.args[0])
        except Exception as error:
            print format_exc()
            return error, 500
Ejemplo n.º 3
0
Archivo: did.py Proyecto: yiiyama/rucio
    def GET(self, scope, name):
        """
        List all meta of a data identifier.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            404 DataIdentifierNotFound
            406 Not Acceptable
            500 InternalError

        :param scope: The scope name.
        :param name: The data identifier name.

        :returns: A dictionary containing all meta.
        """
        header('Content-Type', 'application/json')
        try:
            meta = get_metadata(scope=scope, name=name)
            return render_json(**meta)
        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Ejemplo n.º 4
0
def cache_add_replicas(rse, files, account, lifetime):
    """ Rucio Cache add replicas """

    return_code = 0
    for file in files:
        # check metadata
        try:
            metadata = get_metadata(file["scope"], file["name"])
        except exception.DataIdentifierNotFound:
            logging.error("%s:%s not found. Skip to add it to replicas" % (file["scope"], file["name"]))
            logging.error(str(format_exc()))
            return_code = DID_NOT_FOUND
            continue
        if int(metadata["bytes"]) != int(file["bytes"]) or metadata["adler32"] != file["adler32"]:
            logging.error("%s:%s(bytes:%s, adler32:%s) has different size or checksum with metadata(bytes:%s, adler32:%s). Skip to add it to replicas" % (file["scope"], file["name"], file["bytes"], file["adler32"], metadata["bytes"], metadata["adler32"]))
            return_code = META_MISMATCH
            continue

        # add replica
        try:
            add_replicas(rse, [file], issuer=account)
        except exception.Duplicate:
            logging.warn("%s:%s already exists in %s with error details: %s" % (file["scope"], file["name"], rse, str(format_exc())))
            return_code = ADD_REPLICA_ERROR

    return return_code
Ejemplo n.º 5
0
    def setup(self):
        """RucioCache (Func): Find necessary rse and dids """
        self.id = int(random.random() * 10000)
        self.rse_exist_volatile = 'RUCIO_CACHE_VOLATILE' + str(self.id)
        try:
            rse.add_rse(self.rse_exist_volatile, 'root', deterministic=True, volatile=True)
        except exception.Duplicate:
            logging.warning("rse RUCIO_CACHE_VOLATILE already there")

        self.rse_exist_novolatile = 'RUCIO_CACHE_NOVOLATILE' + str(self.id)
        try:
            rse.add_rse(self.rse_exist_novolatile, 'root', deterministic=True, volatile=False)
        except exception.Duplicate:
            logging.warning("rse RUCIO_CACHE_NOVOLATILE already there")

        self.rse_noExist = 'RUCIO_CACHE_NOEXIST' + str(self.id)
        dids = did.list_dids(scope='mock', filters={}, type='file')
        i = 0
        self.files_exist = []
        self.files_exist_wrong_meta = []
        self.file_replica_on_novolatile = []
        for _did in dids:
            if i < 2:
                i += 1
                meta = did.get_metadata(scope='mock', name=_did[0])
                self.files_exist.append({'scope': meta['scope'], 'name': meta['name'], 'bytes': meta['bytes'], "adler32": meta["adler32"]})
                self.files_exist_wrong_meta.append({'scope': meta['scope'], 'name': meta['name'], 'bytes': 12345678, "adler32": '12345678'})
            elif i < 3:
                meta = did.get_metadata(scope='mock', name=_did[0])
                file = {'scope': meta['scope'], 'name': meta['name'], 'bytes': meta['bytes'], "adler32": meta["adler32"]}
                self.file_replica_on_novolatile.append(file)
                replica.add_replicas(self.rse_exist_novolatile, [file], account='root')

        logging.debug("File Exists: %s " % self.files_exist)
        logging.debug("File Exists with wrong metadata: %s " % self.files_exist_wrong_meta)
        logging.debug("File Exists on volatie rses: " % self.file_replica_on_novolatile)

        self.files_noExist = [{'scope': 'mock', 'name': 'file_notexist', "bytes": 1, "adler32": "0cc737eb"}]
        logging.debug("File not Exists: %s " % self.files_noExist)
        self.account = 'root'
        self.lifetime = 2
Ejemplo n.º 6
0
    def GET(self, scope, name):
        """
        List all meta of a data identifier.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            404 DataIdentifierNotFound
            500 InternalError

        :param scope: The scope name.
        :param name: The data identifier name.

        :returns: A dictionary containing all meta.
        """
        header('Content-Type', 'application/json')
        try:
            meta = get_metadata(scope=scope, name=name)
            return render_json(**meta)
        except DataIdentifierNotFound, error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0][0])
Ejemplo n.º 7
0
def transmogrifier(bulk=5, once=False, sleep_time=60):
    """
    Creates a Transmogrifier Worker that gets a list of new DIDs for a given hash,
    identifies the subscriptions matching the DIDs and
    submit a replication rule for each DID matching a subscription.

    :param thread: Thread number at startup.
    :param bulk: The number of requests to process.
    :param once: Run only once.
    :param sleep_time: Time between two cycles.
    """

    executable = ' '.join(argv)
    hostname = socket.getfqdn()
    pid = os.getpid()
    hb_thread = threading.current_thread()
    heartbeat.sanity_check(executable=executable, hostname=hostname)

    while not graceful_stop.is_set():

        heart_beat = heartbeat.live(executable, hostname, pid, hb_thread)

        dids, subscriptions = [], []
        tottime = 0
        prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1,
                                             heart_beat['nr_threads'])

        try:
            #  Get the new DIDs based on the is_new flag
            for did in list_new_dids(thread=heart_beat['assign_thread'],
                                     total_threads=heart_beat['nr_threads'],
                                     chunk_size=bulk):
                dids.append({
                    'scope': did['scope'],
                    'did_type': str(did['did_type']),
                    'name': did['name']
                })

            sub_dict = {3: []}
            #  Get the list of subscriptions. The default priority of the subscription is 3. 0 is the highest priority, 5 the lowest
            #  The priority is defined as 'policyid'
            for sub in list_subscriptions(None, None):
                if sub['state'] != SubscriptionState.INACTIVE and sub[
                        'lifetime'] and (datetime.now() > sub['lifetime']):
                    update_subscription(
                        name=sub['name'],
                        account=sub['account'],
                        metadata={'state': SubscriptionState.INACTIVE},
                        issuer='root')

                elif sub['state'] in [
                        SubscriptionState.ACTIVE, SubscriptionState.UPDATED
                ]:
                    priority = 3
                    if 'policyid' in sub:
                        if int(sub['policyid']) not in sub_dict:
                            sub_dict[int(sub['policyid'])] = []
                        priority = int(sub['policyid'])
                    sub_dict[priority].append(sub)
            priorities = list(sub_dict.keys())
            priorities.sort()
            #  Order the subscriptions according to their priority
            for priority in priorities:
                subscriptions.extend(sub_dict[priority])
        except SubscriptionNotFound as error:
            logging.warning(prepend_str + 'No subscriptions defined: %s' %
                            (str(error)))
            time.sleep(10)
            continue
        except Exception as error:
            logging.error(
                prepend_str +
                'Failed to get list of new DIDs or subscriptions: %s' %
                (str(error)))

        try:
            results = {}
            start_time = time.time()
            blacklisted_rse_id = [
                rse['id'] for rse in list_rses({'availability_write': False})
            ]
            logging.debug(prepend_str + 'In transmogrifier worker')
            identifiers = []
            #  Loop over all the new dids
            for did in dids:
                did_success = True
                if did['did_type'] == str(
                        DIDType.DATASET) or did['did_type'] == str(
                            DIDType.CONTAINER):
                    results['%s:%s' % (did['scope'], did['name'])] = []
                    try:
                        metadata = get_metadata(did['scope'], did['name'])
                        # Loop over all the subscriptions
                        for subscription in subscriptions:
                            #  Check if the DID match the subscription
                            if is_matching_subscription(
                                    subscription, did, metadata) is True:
                                filter_string = loads(subscription['filter'])
                                split_rule = filter_string.get(
                                    'split_rule', False)
                                if split_rule == 'true':
                                    split_rule = True
                                elif split_rule == 'false':
                                    split_rule = False
                                stime = time.time()
                                results['%s:%s' %
                                        (did['scope'], did['name'])].append(
                                            subscription['id'])
                                logging.info(prepend_str +
                                             '%s:%s matches subscription %s' %
                                             (did['scope'], did['name'],
                                              subscription['name']))
                                for rule_string in loads(
                                        subscription['replication_rules']):
                                    # Get all the rule and subscription parameters
                                    grouping = rule_string.get(
                                        'grouping', 'DATASET')
                                    lifetime = rule_string.get(
                                        'lifetime', None)
                                    ignore_availability = rule_string.get(
                                        'ignore_availability', None)
                                    weight = rule_string.get('weight', None)
                                    source_replica_expression = rule_string.get(
                                        'source_replica_expression', None)
                                    locked = rule_string.get('locked', None)
                                    if locked == 'True':
                                        locked = True
                                    else:
                                        locked = False
                                    purge_replicas = rule_string.get(
                                        'purge_replicas', False)
                                    if purge_replicas == 'True':
                                        purge_replicas = True
                                    else:
                                        purge_replicas = False
                                    rse_expression = str(
                                        rule_string['rse_expression'])
                                    comment = str(subscription['comments'])
                                    subscription_id = str(subscription['id'])
                                    account = subscription['account']
                                    copies = int(rule_string['copies'])
                                    activity = rule_string.get(
                                        'activity', 'User Subscriptions')
                                    try:
                                        validate_schema(name='activity',
                                                        obj=activity)
                                    except InputValidationError as error:
                                        logging.error(
                                            prepend_str +
                                            'Error validating the activity %s'
                                            % (str(error)))
                                        activity = 'User Subscriptions'
                                    if lifetime:
                                        lifetime = int(lifetime)

                                    str_activity = "".join(activity.split())
                                    success = False
                                    nattempt = 5
                                    attemptnr = 0
                                    skip_rule_creation = False

                                    if split_rule:
                                        rses = parse_expression(rse_expression)
                                        list_of_rses = [
                                            rse['rse'] for rse in rses
                                        ]
                                        # Check that some rule doesn't already exist for this DID and subscription
                                        preferred_rse_ids = []
                                        for rule in list_rules(
                                                filters={
                                                    'subscription_id':
                                                    subscription_id,
                                                    'scope': did['scope'],
                                                    'name': did['name']
                                                }):
                                            already_existing_rses = [
                                                (rse['rse'], rse['id'])
                                                for rse in parse_expression(
                                                    rule['rse_expression'])
                                            ]
                                            for rse, rse_id in already_existing_rses:
                                                if (rse in list_of_rses) and (
                                                        rse_id not in
                                                        preferred_rse_ids):
                                                    preferred_rse_ids.append(
                                                        rse_id)
                                        if len(preferred_rse_ids) >= copies:
                                            skip_rule_creation = True

                                        rse_id_dict = {}
                                        for rse in rses:
                                            rse_id_dict[rse['id']] = rse['rse']
                                        try:
                                            rseselector = RSESelector(
                                                account=account,
                                                rses=rses,
                                                weight=weight,
                                                copies=copies -
                                                len(preferred_rse_ids))
                                            selected_rses = [
                                                rse_id_dict[rse_id] for rse_id,
                                                _, _ in rseselector.select_rse(
                                                    0,
                                                    preferred_rse_ids=
                                                    preferred_rse_ids,
                                                    copies=copies,
                                                    blacklist=blacklisted_rse_id
                                                )
                                            ]
                                        except (InsufficientTargetRSEs,
                                                InsufficientAccountLimit,
                                                InvalidRuleWeight,
                                                RSEOverQuota) as error:
                                            logging.warning(
                                                prepend_str +
                                                'Problem getting RSEs for subscription "%s" for account %s : %s. Try including blacklisted sites'
                                                % (subscription['name'],
                                                   account, str(error)))
                                            # Now including the blacklisted sites
                                            try:
                                                rseselector = RSESelector(
                                                    account=account,
                                                    rses=rses,
                                                    weight=weight,
                                                    copies=copies -
                                                    len(preferred_rse_ids))
                                                selected_rses = [
                                                    rse_id_dict[rse_id]
                                                    for rse_id, _, _ in
                                                    rseselector.select_rse(
                                                        0,
                                                        preferred_rse_ids=
                                                        preferred_rse_ids,
                                                        copies=copies,
                                                        blacklist=[])
                                                ]
                                                ignore_availability = True
                                            except (InsufficientTargetRSEs,
                                                    InsufficientAccountLimit,
                                                    InvalidRuleWeight,
                                                    RSEOverQuota) as error:
                                                logging.error(
                                                    prepend_str +
                                                    'Problem getting RSEs for subscription "%s" for account %s : %s. Skipping rule creation.'
                                                    % (subscription['name'],
                                                       account, str(error)))
                                                monitor.record_counter(
                                                    counters=
                                                    'transmogrifier.addnewrule.errortype.%s'
                                                    % (str(error.__class__.
                                                           __name__)),
                                                    delta=1)
                                                # The DID won't be reevaluated at the next cycle
                                                did_success = did_success and True
                                                continue

                                    for attempt in range(0, nattempt):
                                        attemptnr = attempt
                                        nb_rule = 0
                                        #  Try to create the rule
                                        try:
                                            if split_rule:
                                                if not skip_rule_creation:
                                                    for rse in selected_rses:
                                                        logging.info(
                                                            prepend_str +
                                                            'Will insert one rule for %s:%s on %s'
                                                            %
                                                            (did['scope'],
                                                             did['name'], rse))
                                                        add_rule(
                                                            dids=[{
                                                                'scope':
                                                                did['scope'],
                                                                'name':
                                                                did['name']
                                                            }],
                                                            account=account,
                                                            copies=1,
                                                            rse_expression=rse,
                                                            grouping=grouping,
                                                            weight=weight,
                                                            lifetime=lifetime,
                                                            locked=locked,
                                                            subscription_id=
                                                            subscription_id,
                                                            source_replica_expression
                                                            =source_replica_expression,
                                                            activity=activity,
                                                            purge_replicas=
                                                            purge_replicas,
                                                            ignore_availability=
                                                            ignore_availability,
                                                            comment=comment)

                                                        nb_rule += 1
                                                        if nb_rule == copies:
                                                            success = True
                                                            break
                                            else:
                                                add_rule(
                                                    dids=[{
                                                        'scope': did['scope'],
                                                        'name': did['name']
                                                    }],
                                                    account=account,
                                                    copies=copies,
                                                    rse_expression=
                                                    rse_expression,
                                                    grouping=grouping,
                                                    weight=weight,
                                                    lifetime=lifetime,
                                                    locked=locked,
                                                    subscription_id=
                                                    subscription['id'],
                                                    source_replica_expression=
                                                    source_replica_expression,
                                                    activity=activity,
                                                    purge_replicas=
                                                    purge_replicas,
                                                    ignore_availability=
                                                    ignore_availability,
                                                    comment=comment)
                                                nb_rule += 1
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.done',
                                                delta=nb_rule)
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.activity.%s'
                                                % str_activity,
                                                delta=nb_rule)
                                            success = True
                                            break
                                        except (InvalidReplicationRule,
                                                InvalidRuleWeight,
                                                InvalidRSEExpression,
                                                StagingAreaRuleRequiresLifetime,
                                                DuplicateRule) as error:
                                            # Errors that won't be retried
                                            success = True
                                            logging.error(prepend_str + '%s' %
                                                          (str(error)))
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.errortype.%s'
                                                % (str(
                                                    error.__class__.__name__)),
                                                delta=1)
                                            break
                                        except (ReplicationRuleCreationTemporaryFailed,
                                                InsufficientTargetRSEs,
                                                InsufficientAccountLimit,
                                                DatabaseException,
                                                RSEBlacklisted) as error:
                                            # Errors to be retried
                                            logging.error(
                                                prepend_str +
                                                '%s Will perform an other attempt %i/%i'
                                                % (str(error), attempt + 1,
                                                   nattempt))
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.errortype.%s'
                                                % (str(
                                                    error.__class__.__name__)),
                                                delta=1)
                                        except Exception as error:
                                            # Unexpected errors
                                            monitor.record_counter(
                                                counters=
                                                'transmogrifier.addnewrule.errortype.unknown',
                                                delta=1)
                                            exc_type, exc_value, exc_traceback = exc_info(
                                            )
                                            logging.critical(
                                                prepend_str + ''.join(
                                                    format_exception(
                                                        exc_type, exc_value,
                                                        exc_traceback)).strip(
                                                        ))

                                    did_success = (did_success and success)
                                    if (attemptnr +
                                            1) == nattempt and not success:
                                        logging.error(
                                            prepend_str +
                                            'Rule for %s:%s on %s cannot be inserted'
                                            % (did['scope'], did['name'],
                                               rse_expression))
                                    else:
                                        logging.info(
                                            prepend_str +
                                            '%s rule(s) inserted in %f seconds'
                                            % (str(nb_rule),
                                               time.time() - stime))
                    except DataIdentifierNotFound as error:
                        logging.warning(prepend_str + error)

                if did_success:
                    if did['did_type'] == str(DIDType.FILE):
                        monitor.record_counter(
                            counters='transmogrifier.did.file.processed',
                            delta=1)
                    elif did['did_type'] == str(DIDType.DATASET):
                        monitor.record_counter(
                            counters='transmogrifier.did.dataset.processed',
                            delta=1)
                    elif did['did_type'] == str(DIDType.CONTAINER):
                        monitor.record_counter(
                            counters='transmogrifier.did.container.processed',
                            delta=1)
                    monitor.record_counter(
                        counters='transmogrifier.did.processed', delta=1)
                    identifiers.append({
                        'scope':
                        did['scope'],
                        'name':
                        did['name'],
                        'did_type':
                        DIDType.from_sym(did['did_type'])
                    })

            time1 = time.time()

            #  Mark the DIDs as processed
            for identifier in chunks(identifiers, 100):
                _retrial(set_new_dids, identifier, None)

            logging.info(prepend_str + 'Time to set the new flag : %f' %
                         (time.time() - time1))
            tottime = time.time() - start_time
            for sub in subscriptions:
                update_subscription(
                    name=sub['name'],
                    account=sub['account'],
                    metadata={'last_processed': datetime.now()},
                    issuer='root')
            logging.info(prepend_str +
                         'It took %f seconds to process %i DIDs' %
                         (tottime, len(dids)))
            logging.debug(prepend_str + 'DIDs processed : %s' % (str(dids)))
            monitor.record_counter(counters='transmogrifier.job.done', delta=1)
            monitor.record_timer(stat='transmogrifier.job.duration',
                                 time=1000 * tottime)
        except Exception:
            exc_type, exc_value, exc_traceback = exc_info()
            logging.critical(prepend_str + ''.join(
                format_exception(exc_type, exc_value, exc_traceback)).strip())
            monitor.record_counter(counters='transmogrifier.job.error',
                                   delta=1)
            monitor.record_counter(counters='transmogrifier.addnewrule.error',
                                   delta=1)
        if once is True:
            break
        if tottime < sleep_time:
            logging.info(prepend_str + 'Will sleep for %s seconds' %
                         (sleep_time - tottime))
            time.sleep(sleep_time - tottime)
    heartbeat.die(executable, hostname, pid, hb_thread)
    logging.info(prepend_str + 'Graceful stop requested')
    logging.info(prepend_str + 'Graceful stop done')
Ejemplo n.º 8
0
    def setup(self):
        """RucioCache (Func): Find necessary rse and dids """
        self.id = int(random.random() * 10000)
        self.rse_exist_volatile = 'RUCIO_CACHE_VOLATILE' + str(self.id)
        try:
            rse.add_rse(self.rse_exist_volatile,
                        'root',
                        deterministic=True,
                        volatile=True)
        except exception.Duplicate:
            logging.warning("rse RUCIO_CACHE_VOLATILE already there")

        self.rse_exist_novolatile = 'RUCIO_CACHE_NOVOLATILE' + str(self.id)
        try:
            rse.add_rse(self.rse_exist_novolatile,
                        'root',
                        deterministic=True,
                        volatile=False)
        except exception.Duplicate:
            logging.warning("rse RUCIO_CACHE_NOVOLATILE already there")

        self.rse_noExist = 'RUCIO_CACHE_NOEXIST' + str(self.id)
        dids = did.list_dids(scope='mock', filters={}, type='file')
        i = 0
        self.files_exist = []
        self.files_exist_wrong_meta = []
        self.file_replica_on_novolatile = []
        for _did in dids:
            if i < 2:
                i += 1
                meta = did.get_metadata(scope='mock', name=_did[0])
                self.files_exist.append({
                    'scope': meta['scope'],
                    'name': meta['name'],
                    'bytes': meta['bytes'],
                    "adler32": meta["adler32"]
                })
                self.files_exist_wrong_meta.append({
                    'scope': meta['scope'],
                    'name': meta['name'],
                    'bytes': 12345678,
                    "adler32": '12345678'
                })
            elif i < 3:
                meta = did.get_metadata(scope='mock', name=_did[0])
                file = {
                    'scope': meta['scope'],
                    'name': meta['name'],
                    'bytes': meta['bytes'],
                    "adler32": meta["adler32"]
                }
                self.file_replica_on_novolatile.append(file)
                replica.add_replicas(self.rse_exist_novolatile, [file],
                                     account='root')

        logging.debug("File Exists: %s " % self.files_exist)
        logging.debug("File Exists with wrong metadata: %s " %
                      self.files_exist_wrong_meta)
        logging.debug("File Exists on volatie rses: " %
                      self.file_replica_on_novolatile)

        self.files_noExist = [{
            'scope': 'mock',
            'name': 'file_notexist',
            "bytes": 1,
            "adler32": "0cc737eb"
        }]
        logging.debug("File not Exists: %s " % self.files_noExist)
        self.account = 'root'
        self.lifetime = 2
Ejemplo n.º 9
0
try:
    results = {}
    start_time = time.time()
    blacklisted_rse_id = [
        rse['id'] for rse in list_rses({'availability_write': False})
    ]
    logging.debug(prepend_str + 'In transmogrifier worker')
    identifiers = []
    for did in dids:
        did_success = True
        if did['did_type'] == str(
                DIDType.DATASET) or did['did_type'] == str(
                    DIDType.CONTAINER):
            results['%s:%s' % (did['scope'], did['name'])] = []
            try:
                metadata = get_metadata(did['scope'], did['name'])
                for subscription in subscriptions:
                    if is_matching_subscription(
                            subscription, did, metadata) is True:
                        filter = loads(subscription['filter'])
                        split_rule = filter.get('split_rule', False)
                        if split_rule == 'true':
                            split_rule = True
                        elif split_rule == 'false':
                            split_rule = False
                        stime = time.time()
                        results['%s:%s' %
                                (did['scope'], did['name'])].append(
                                    subscription['id'])
                        logging.info(prepend_str +
                                     '%s:%s matches subscription %s' %
Ejemplo n.º 10
0
def transmogrifier(worker_number=1, total_workers=1, chunk_size=5, once=False):
    """
    Creates a Transmogrifier Worker that gets a list of new DIDs for a given hash, identifies the subscriptions matching the DIDs and submit a replication rule for each DID matching a subscription.

    param worker_number: The number of the worker (thread).
    param total_number: The total number of workers (threads).
    chunk_size: The chunk of the size to process.
    once: To run only once
    """
    while not graceful_stop.is_set():
        dids, subscriptions = [], []
        tottime = 0
        try:
            for did in list_new_dids(worker_number=worker_number, total_workers=total_workers, chunk_size=chunk_size):
                d = {"scope": did["scope"], "did_type": str(did["did_type"]), "name": did["name"]}
                dids.append(d)
            for sub in list_subscriptions(None, None):
                if sub["state"] in [SubscriptionState.ACTIVE, SubscriptionState.UPDATED]:
                    subscriptions.append(sub)
        except:
            logging.error("Thread %i : Failed to get list of new DIDs or subsscriptions" % (worker_number))
            if once:
                break
            else:
                continue

        try:
            results = {}
            start_time = time.time()
            logging.debug("Thread %i : In transmogrifier worker" % (worker_number))
            identifiers = []
            for did in dids:
                if did["did_type"] == str(DIDType.DATASET) or did["did_type"] == str(DIDType.CONTAINER):
                    results["%s:%s" % (did["scope"], did["name"])] = []
                    try:
                        metadata = get_metadata(did["scope"], did["name"])
                        for subscription in subscriptions:
                            if is_matching_subscription(subscription, did, metadata) is True:
                                stime = time.time()
                                results["%s:%s" % (did["scope"], did["name"])].append(subscription["id"])
                                logging.info(
                                    "Thread %i : %s:%s matches subscription %s"
                                    % (worker_number, did["scope"], did["name"], subscription["name"])
                                )
                                for rule in loads(subscription["replication_rules"]):
                                    grouping = rule.get("grouping", "DATASET")
                                    lifetime = rule.get("lifetime", None)
                                    if lifetime:
                                        lifetime = int(lifetime)
                                    weight = rule.get("weight", None)
                                    source_replica_expression = rule.get("source_replica_expression", None)
                                    activity = rule.get("activity", None)
                                    try:
                                        rse_expression = str(rule["rse_expression"]).encode("string-escape")
                                        add_rule(
                                            dids=[{"scope": did["scope"], "name": did["name"]}],
                                            account=subscription["account"],
                                            copies=int(rule["copies"]),
                                            rse_expression=rse_expression,
                                            grouping=grouping,
                                            weight=weight,
                                            lifetime=lifetime,
                                            locked=False,
                                            subscription_id=subscription["id"],
                                            source_replica_expression=source_replica_expression,
                                            activity=activity,
                                        )
                                        monitor.record_counter(counters="transmogrifier.addnewrule.done", delta=1)
                                        if subscription["name"].find("test") > -1:
                                            monitor.record_counter(
                                                counters="transmogrifier.addnewrule.activity.test", delta=1
                                            )
                                        elif subscription["name"].startswith("group"):
                                            monitor.record_counter(
                                                counters="transmogrifier.addnewrule.activity.group", delta=1
                                            )
                                        elif subscription["name"].startswith("tier0export"):
                                            monitor.record_counter(
                                                counters="transmogrifier.addnewrule.activity.tier0export", delta=1
                                            )
                                        elif subscription["name"].endswith("export"):
                                            monitor.record_counter(
                                                counters="transmogrifier.addnewrule.activity.dataconsolidation", delta=1
                                            )
                                        else:
                                            monitor.record_counter(
                                                counters="transmogrifier.addnewrule.activity.other", delta=1
                                            )
                                    except InvalidReplicationRule, e:
                                        logging.error("Thread %i : %s" % (worker_number, str(e)))
                                        monitor.record_counter(
                                            counters="transmogrifier.addnewrule.errortype.InvalidReplicationRule",
                                            delta=1,
                                        )
                                    except InvalidRuleWeight, e:
                                        logging.error("Thread %i : %s" % (worker_number, str(e)))
                                        monitor.record_counter(
                                            counters="transmogrifier.addnewrule.errortype.InvalidRuleWeight", delta=1
                                        )
                                    except InvalidRSEExpression, e:
                                        logging.error("Thread %i : %s" % (worker_number, str(e)))
                                        monitor.record_counter(
                                            counters="transmogrifier.addnewrule.errortype.InvalidRSEExpression", delta=1
                                        )
                                    except StagingAreaRuleRequiresLifetime, e:
                                        logging.error("Thread %i : %s" % (worker_number, str(e)))
                                        monitor.record_counter(
                                            counters="transmogrifier.addnewrule.errortype.StagingAreaRuleRequiresLifetime",
                                            delta=1,
                                        )
                                    except ReplicationRuleCreationTemporaryFailed, e:
                                        # Should never occur. Just for completness
                                        logging.error("Thread %i : %s" % (worker_number, str(e)))
                                        monitor.record_counter(
                                            counters="transmogrifier.addnewrule.errortype.ReplicationRuleCreationTemporaryFailed",
                                            delta=1,
                                        )