Exemple #1
0
    def test_touch_replicas(self):
        """ REPLICA (CORE): Touch replicas accessed_at timestamp"""
        tmp_scope = 'mock'
        nbfiles = 5
        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        files2.append(files1[0])
        add_replicas(rse='MOCK', files=files1, account='root', ignore_availability=True)
        add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True)

        now = datetime.utcnow()

        now -= timedelta(microseconds=now.microsecond)

        assert_equal(None, get_replica_atime({'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK'}))
        assert_equal(None, get_did_atime(scope=tmp_scope, name=files1[0]['name']))

        for r in [{'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK', 'accessed_at': now}]:
            touch_replica(r)

        assert_equal(now, get_replica_atime({'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK'}))
        assert_equal(now, get_did_atime(scope=tmp_scope, name=files1[0]['name']))

        for i in range(1, nbfiles):
            assert_equal(None, get_replica_atime({'scope': files1[i]['scope'], 'name': files1[i]['name'], 'rse': 'MOCK'}))

        for i in range(0, nbfiles - 1):
            assert_equal(None, get_replica_atime({'scope': files2[i]['scope'], 'name': files2[i]['name'], 'rse': 'MOCK'}))
Exemple #2
0
    def __update_atime(self):
        """
        Bulk update atime.
        """
        replicas = []
        rses = []
        for report in self.__reports:
            if 'vo' not in report:
                report['vo'] = 'def'

            try:
                # Identify suspicious files
                try:
                    if self.__bad_files_patterns and report['eventType'] in [
                            'get_sm', 'get_sm_a', 'get'
                    ] and 'clientState' in report and report[
                            'clientState'] not in [
                                'DONE', 'FOUND_ROOT', 'ALREADY_DONE'
                            ]:
                        for pattern in self.__bad_files_patterns:
                            if 'stateReason' in report and report[
                                    'stateReason'] and isinstance(
                                        report['stateReason'],
                                        str) and pattern.match(
                                            report['stateReason']):
                                reason = report['stateReason'][:255]
                                if 'url' not in report or not report['url']:
                                    self.__logger(
                                        logging.ERROR,
                                        'Missing url in the following trace : '
                                        + str(report))
                                else:
                                    try:
                                        surl = report['url']
                                        declare_bad_file_replicas(
                                            [
                                                surl,
                                            ],
                                            reason=reason,
                                            issuer=InternalAccount(
                                                'root', vo=report['vo']),
                                            status=BadFilesStatus.SUSPICIOUS)
                                        self.__logger(
                                            logging.INFO,
                                            'Declare suspicious file %s with reason %s'
                                            % (report['url'], reason))
                                    except Exception as error:
                                        self.__logger(
                                            logging.ERROR,
                                            'Failed to declare suspicious file'
                                            + str(error))
                except Exception as error:
                    self.__logger(
                        logging.ERROR,
                        'Problem with bad trace : %s . Error %s' %
                        (str(report), str(error)))

                # check if scope in report. if not skip this one.
                if 'scope' not in report:
                    record_counter('daemons.tracer.kronos.missing_scope')
                    if report['eventType'] != 'touch':
                        continue
                else:
                    record_counter('daemons.tracer.kronos.with_scope')
                    report['scope'] = InternalScope(report['scope'],
                                                    report['vo'])

                # handle all events starting with get* and download and touch events.
                if not report['eventType'].startswith('get') and not report[
                        'eventType'].startswith('sm_get') and not report[
                            'eventType'] == 'download' and not report[
                                'eventType'] == 'touch':
                    continue
                if report['eventType'].endswith('_es'):
                    continue
                record_counter('daemons.tracer.kronos.total_get')
                if report['eventType'] == 'get':
                    record_counter('daemons.tracer.kronos.dq2clients')
                elif report['eventType'] == 'get_sm' or report[
                        'eventType'] == 'sm_get':
                    if report['eventVersion'] == 'aCT':
                        record_counter(
                            'daemons.tracer.kronos.panda_production_act')
                    else:
                        record_counter(
                            'daemons.tracer.kronos.panda_production')
                elif report['eventType'] == 'get_sm_a' or report[
                        'eventType'] == 'sm_get_a':
                    if report['eventVersion'] == 'aCT':
                        record_counter(
                            'daemons.tracer.kronos.panda_analysis_act')
                    else:
                        record_counter('daemons.tracer.kronos.panda_analysis')
                elif report['eventType'] == 'download':
                    record_counter('daemons.tracer.kronos.rucio_download')
                elif report['eventType'] == 'touch':
                    record_counter('daemons.tracer.kronos.rucio_touch')
                else:
                    record_counter('daemons.tracer.kronos.other_get')

                if report['eventType'] == 'download' or report[
                        'eventType'] == 'touch':
                    report['usrdn'] = report['account']

                if report['usrdn'] in self.__excluded_usrdns:
                    continue
                # handle touch and non-touch traces differently
                if report['eventType'] != 'touch':
                    # check if the report has the right state.
                    if 'eventVersion' in report:
                        if report['eventVersion'] != 'aCT':
                            if report['clientState'] in self.__excluded_states:
                                continue

                    if 'remoteSite' not in report:
                        continue
                    if not report['remoteSite']:
                        continue

                    if 'filename' not in report:
                        if 'name' in report:
                            report['filename'] = report['name']

                    rses = report['remoteSite'].strip().split(',')
                    for rse in rses:
                        try:
                            rse_id = get_rse_id(rse=rse, vo=report['vo'])
                        except RSENotFound:
                            self.__logger(
                                logging.WARNING,
                                "Cannot lookup rse_id for %s. Will skip this report.",
                                rse)
                            record_counter(
                                'daemons.tracer.kronos.rse_not_found')
                            continue
                        replicas.append({
                            'name':
                            report['filename'],
                            'scope':
                            report['scope'],
                            'rse':
                            rse,
                            'rse_id':
                            rse_id,
                            'accessed_at':
                            datetime.utcfromtimestamp(
                                report['traceTimeentryUnix']),
                            'traceTimeentryUnix':
                            report['traceTimeentryUnix'],
                            'eventVersion':
                            report['eventVersion']
                        })
                else:
                    # if touch event and if datasetScope is in the report then it means
                    # that there is no file scope/name and therefore only the dataset is
                    # put in the queue to be updated and the rest is skipped.
                    rse_id = None
                    rse = None
                    if 'remoteSite' in report:
                        rse = report['remoteSite']
                        try:
                            rse_id = get_rse_id(rse=rse, vo=report['vo'])
                        except RSENotFound:
                            self.__logger(logging.WARNING,
                                          "Cannot lookup rse_id for %s.", rse)
                            record_counter(
                                'daemons.tracer.kronos.rse_not_found')
                    if 'datasetScope' in report:
                        self.__dataset_queue.put({
                            'scope':
                            InternalScope(report['datasetScope'],
                                          vo=report['vo']),
                            'name':
                            report['dataset'],
                            'rse_id':
                            rse_id,
                            'accessed_at':
                            datetime.utcfromtimestamp(
                                report['traceTimeentryUnix'])
                        })
                        continue
                    else:
                        if 'remoteSite' not in report:
                            continue
                        replicas.append({
                            'name':
                            report['filename'],
                            'scope':
                            report['scope'],
                            'rse':
                            rse,
                            'rse_id':
                            rse_id,
                            'accessed_at':
                            datetime.utcfromtimestamp(
                                report['traceTimeentryUnix'])
                        })

            except (KeyError, AttributeError):
                self.__logger(logging.ERROR,
                              "Cannot handle report.",
                              exc_info=True)
                record_counter('daemons.tracer.kronos.report_error')
                continue
            except Exception:
                self.__logger(logging.ERROR, "Exception", exc_info=True)
                continue

            for did in list_parent_dids(report['scope'], report['filename']):
                if did['type'] != DIDType.DATASET:
                    continue
                # do not update _dis datasets
                if did['scope'].external == 'panda' and '_dis' in did['name']:
                    continue
                for rse in rses:
                    try:
                        rse_id = get_rse_id(rse=rse, vo=report['vo'])
                    except RSENotFound:
                        self.__logger(
                            logging.WARNING,
                            "Cannot lookup rse_id for %s. Will skip this report.",
                            rse)
                        record_counter('daemons.tracer.kronos.rse_not_found')
                        continue
                    self.__dataset_queue.put({
                        'scope':
                        did['scope'],
                        'name':
                        did['name'],
                        'did_type':
                        did['type'],
                        'rse_id':
                        rse_id,
                        'accessed_at':
                        datetime.utcfromtimestamp(report['traceTimeentryUnix'])
                    })

        if not len(replicas):
            return

        self.__logger(logging.DEBUG, "trying to update replicas: %s", replicas)

        try:
            start_time = time()
            for replica in replicas:
                # if touch replica hits a locked row put the trace back into queue for later retry
                if not touch_replica(replica):
                    resubmit = {
                        'filename': replica['name'],
                        'scope': replica['scope'].external,
                        'remoteSite': replica['rse'],
                        'traceTimeentryUnix': replica['traceTimeentryUnix'],
                        'eventType': 'get',
                        'usrdn': 'someuser',
                        'clientState': 'DONE',
                        'eventVersion': replica['eventVersion']
                    }
                    if replica['scope'].vo != 'def':
                        resubmit['vo'] = replica['scope'].vo
                    self.__conn.send(body=jdumps(resubmit),
                                     destination=self.__queue,
                                     headers={
                                         'appversion': 'rucio',
                                         'resubmitted': '1'
                                     })
                    record_counter('daemons.tracer.kronos.sent_resubmitted')
                    self.__logger(logging.WARNING,
                                  'hit locked row, resubmitted to queue')
            record_timer('daemons.tracer.kronos.update_atime',
                         (time() - start_time) * 1000)
        except Exception:
            self.__logger(logging.ERROR,
                          "Cannot update replicas.",
                          exc_info=True)
            record_counter('daemons.tracer.kronos.update_error')

        self.__logger(logging.INFO, 'updated %d replica(s)' % len(replicas))
Exemple #3
0
    def __update_atime(self):
        """
        Bulk update atime.
        """
        replicas = []
        rses = []
        for report in self.__reports:
            try:
                # check if scope in report. if not skip this one.
                if 'scope' not in report:
                    record_counter('daemons.tracer.kronos.missing_scope')
                    if report['eventType'] != 'touch':
                        continue
                else:
                    record_counter('daemons.tracer.kronos.with_scope')

                # handle all events starting with get* and download and touch events.
                if not report['eventType'].startswith('get') and not report[
                        'eventType'].startswith('sm_get') and not report[
                            'eventType'] == 'download' and not report[
                                'eventType'] == 'touch':
                    continue
                if report['eventType'].endswith('_es'):
                    continue
                record_counter('daemons.tracer.kronos.total_get')
                if report['eventType'] == 'get':
                    record_counter('daemons.tracer.kronos.dq2clients')
                elif report['eventType'] == 'get_sm' or report[
                        'eventType'] == 'sm_get':
                    if report['eventVersion'] == 'aCT':
                        record_counter(
                            'daemons.tracer.kronos.panda_production_act')
                    else:
                        record_counter(
                            'daemons.tracer.kronos.panda_production')
                elif report['eventType'] == 'get_sm_a' or report[
                        'eventType'] == 'sm_get_a':
                    if report['eventVersion'] == 'aCT':
                        record_counter(
                            'daemons.tracer.kronos.panda_analysis_act')
                    else:
                        record_counter('daemons.tracer.kronos.panda_analysis')
                elif report['eventType'] == 'download':
                    record_counter('daemons.tracer.kronos.rucio_download')
                elif report['eventType'] == 'touch':
                    record_counter('daemons.tracer.kronos.rucio_touch')
                else:
                    record_counter('daemons.tracer.kronos.other_get')

                if report['eventType'] == 'download' or report[
                        'eventType'] == 'touch':
                    report['usrdn'] = report['account']

                if report['usrdn'] in self.__excluded_usrdns:
                    continue

                # handle touch and non-touch traces differently
                if report['eventType'] != 'touch':
                    # check if the report has the right state.
                    if 'eventVersion' in report:
                        if report['eventVersion'] != 'aCT':
                            if report['clientState'] in self.__excluded_states:
                                continue

                    if 'remoteSite' not in report:
                        continue
                    if not report['remoteSite']:
                        continue

                    if 'filename' not in report:
                        if 'name' in report:
                            report['filename'] = report['name']

                    rses = report['remoteSite'].strip().split(',')
                    for rse in rses:
                        replicas.append({
                            'name':
                            report['filename'],
                            'scope':
                            report['scope'],
                            'rse':
                            rse,
                            'accessed_at':
                            datetime.utcfromtimestamp(
                                report['traceTimeentryUnix']),
                            'traceTimeentryUnix':
                            report['traceTimeentryUnix'],
                            'eventVersion':
                            report['eventVersion']
                        })
                else:
                    # if touch event and if datasetScope is in the report then it means
                    # that there is no file scope/name and therefore only the dataset is
                    # put in the queue to be updated and the rest is skipped.
                    if 'datasetScope' in report:
                        rse = None
                        if 'remoteSite' in report:
                            rse = report['remoteSite']
                        self.__dataset_queue.put({
                            'scope':
                            report['datasetScope'],
                            'name':
                            report['dataset'],
                            'rse':
                            rse,
                            'accessed_at':
                            datetime.utcfromtimestamp(
                                report['traceTimeentryUnix'])
                        })
                        continue
                    else:
                        if 'remoteSite' not in report:
                            continue
                        replicas.append({
                            'name':
                            report['filename'],
                            'scope':
                            report['scope'],
                            'rse':
                            report['remoteSite'],
                            'accessed_at':
                            datetime.utcfromtimestamp(
                                report['traceTimeentryUnix'])
                        })

            except (KeyError, AttributeError):
                logging.error(format_exc())
                record_counter('daemons.tracer.kronos.report_error')
                continue

            for did in list_parent_dids(report['scope'], report['filename']):
                if did['type'] != DIDType.DATASET:
                    continue
                # do not update _dis datasets
                if did['scope'] == 'panda' and '_dis' in did['name']:
                    continue
                for rse in rses:
                    self.__dataset_queue.put({
                        'scope':
                        did['scope'],
                        'name':
                        did['name'],
                        'did_type':
                        did['type'],
                        'rse':
                        rse,
                        'accessed_at':
                        datetime.utcfromtimestamp(report['traceTimeentryUnix'])
                    })

        logging.debug(replicas)

        try:
            ts = time()
            for replica in replicas:
                # if touch replica hits a locked row put the trace back into queue for later retry
                if not touch_replica(replica):
                    resubmit = {
                        'filename': replica['name'],
                        'scope': replica['scope'],
                        'remoteSite': replica['rse'],
                        'traceTimeentryUnix': replica['traceTimeentryUnix'],
                        'eventType': 'get',
                        'usrdn': 'someuser',
                        'clientState': 'DONE',
                        'eventVersion': replica['eventVersion']
                    }
                    self.__conn.send(body=jdumps(resubmit),
                                     destination=self.__queue,
                                     headers={
                                         'appversion': 'rucio',
                                         'resubmitted': '1'
                                     })
                    record_counter('daemons.tracer.kronos.sent_resubmitted')
                    logging.warning(
                        '(kronos_file) hit locked row, resubmitted to queue')
            record_timer('daemons.tracer.kronos.update_atime',
                         (time() - ts) * 1000)
        except:
            logging.error(format_exc())
            record_counter('daemons.tracer.kronos.update_error')

        logging.info('(kronos_file) updated %d replicas' % len(replicas))
Exemple #4
0
            None,
            get_replica_atime({
                'scope': files1[0]['scope'],
                'name': files1[0]['name'],
                'rse': 'MOCK'
            }))
        assert_equal(None,
                     get_did_atime(scope=tmp_scope, name=files1[0]['name']))

        for r in [{
                'scope': files1[0]['scope'],
                'name': files1[0]['name'],
                'rse': 'MOCK',
                'accessed_at': now
        }]:
            touch_replica(r)

        assert_equal(
            now,
            get_replica_atime({
                'scope': files1[0]['scope'],
                'name': files1[0]['name'],
                'rse': 'MOCK'
            }))
        assert_equal(now, get_did_atime(scope=tmp_scope,
                                        name=files1[0]['name']))

        for i in range(1, nbfiles):
            assert_equal(
                None,
                get_replica_atime({