def test_real_sample(self):
        prefix = components('/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/')
        path_regular = components(
            '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/rucio/group10/perf-jets/02/1a/group10.perf-jets.data12_8TeV.periodI.physics_HadDelayed.jmr.2015.01.29.v01.log.4770484.000565.log.tgz'
        )
        path_user = components(
            '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/rucio/user/zxi/fd/73/user.zxi.361100.PowhegPythia8EvtGen.DAOD_TOPQ1.e3601_s2576_s2132_r6630_r6264_p2363.08-12-15.log.6249615.000015.log.tgz'
        )
        path_group = components(
            '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/rucio/group/det-ibl/00/5d/group.det-ibl.6044653.BTAGSTREAM._000014.root'
        )
        path_sam = components(
            '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/SAM/testfile17-GET-ATLASSCRATCHDISK'
        )

        nose.tools.eq_(
            '/'.join(remove_prefix(prefix, path_regular)),
            'rucio/group10/perf-jets/02/1a/group10.perf-jets.data12_8TeV.periodI.physics_HadDelayed.jmr.2015.01.29.v01.log.4770484.000565.log.tgz',
            'Normal path inside directory rucio/',
        )
        nose.tools.eq_(
            '/'.join(remove_prefix(prefix, path_user)),
            'rucio/user/zxi/fd/73/user.zxi.361100.PowhegPythia8EvtGen.DAOD_TOPQ1.e3601_s2576_s2132_r6630_r6264_p2363.08-12-15.log.6249615.000015.log.tgz',
            'User path inside rucio/',
        )
        nose.tools.eq_(
            '/'.join(remove_prefix(prefix, path_group)),
            'rucio/group/det-ibl/00/5d/group.det-ibl.6044653.BTAGSTREAM._000014.root',
            'Group path inside rucio/',
        )
        nose.tools.eq_(
            '/'.join(remove_prefix(prefix, path_sam)),
            'SAM/testfile17-GET-ATLASSCRATCHDISK',
            'SAM path (outside rucio/)',
        )
Beispiel #2
0
        def strip_storage_dump(line):
            '''
            Parser to have consistent paths in storage dumps.

            :param line: String with one line of a dump.
            :returns: Path formated as in the Rucio Replica Dumps.
            '''
            relative = path_parsing.remove_prefix(
                prefix_components,
                path_parsing.components(line),
            )
            if relative[0] == 'rucio':
                relative = relative[1:]
            return '/'.join(relative)
Beispiel #3
0
    def dump(cls,
             subcommand,
             ddm_endpoint,
             storage_dump,
             prev_date_fname=None,
             next_date_fname=None,
             prev_date=None,
             next_date=None,
             sort_rucio_replica_dumps=True,
             date=None,
             cache_dir=DUMPS_CACHE_DIR):
        logger = logging.getLogger('auditor.consistency')
        if subcommand == 'consistency':
            prev_date_fname = data_models.Replica.download(ddm_endpoint,
                                                           prev_date,
                                                           cache_dir=cache_dir)
            next_date_fname = data_models.Replica.download(ddm_endpoint,
                                                           next_date,
                                                           cache_dir=cache_dir)
            assert prev_date_fname is not None
            assert next_date_fname is not None
        else:
            assert subcommand == 'consistency-manual'

        prefix = path_parsing.prefix(
            dumper.agis_endpoints_data(),
            ddm_endpoint,
        )
        prefix_components = path_parsing.components(prefix)

        def parser(line):
            '''
            Simple parser for Rucio replica dumps.

            :param line: String with one line of a dump.
            :returns: A tuple with the path and status of the replica.
            '''
            fields = line.split('\t')
            path = fields[6].strip().lstrip('/')
            status = fields[8].strip()

            return ','.join((path, status))

        def strip_storage_dump(line):
            '''
            Parser to have consistent paths in storage dumps.

            :param line: String with one line of a dump.
            :returns: Path formated as in the Rucio Replica Dumps.
            '''
            relative = path_parsing.remove_prefix(
                prefix_components,
                path_parsing.components(line),
            )
            if relative[0] == 'rucio':
                relative = relative[1:]
            return '/'.join(relative)

        if sort_rucio_replica_dumps:
            prev_date_fname_sorted = gnu_sort(
                parse_and_filter_file(prev_date_fname,
                                      parser=parser,
                                      cache_dir=cache_dir),
                delimiter=',',
                fieldspec='1',
                cache_dir=cache_dir,
            )

            next_date_fname_sorted = gnu_sort(
                parse_and_filter_file(next_date_fname,
                                      parser=parser,
                                      cache_dir=cache_dir),
                delimiter=',',
                fieldspec='1',
                cache_dir=cache_dir,
            )
        else:
            prev_date_fname_sorted = parse_and_filter_file(
                prev_date_fname,
                parser=parser,
                cache_dir=cache_dir,
            )
            next_date_fname_sorted = parse_and_filter_file(
                next_date_fname,
                parser=parser,
                cache_dir=cache_dir,
            )

        standard_name_re = r'(ddmendpoint_{0}_\d{{2}}-\d{{2}}-\d{{4}}_[0-9a-f]{{40}})$'.format(
            ddm_endpoint)
        standard_name_match = re.search(standard_name_re, storage_dump)
        if standard_name_match is not None:
            # If the original filename was generated using the expected format,
            # just use the name as prefix for the parsed file.
            sd_prefix = standard_name_match.group(0)
        elif date is not None:
            # Otherwise try to use the date information and DDMEndpoint name to
            # have a meaningful filename.
            sd_prefix = 'ddmendpoint_{0}_{1}'.format(
                ddm_endpoint,
                date.strftime('%d-%m-%Y'),
            )
        else:
            # As last resort use only the DDMEndpoint name, but this is error
            # prone as old dumps may interfere with the checks.
            sd_prefix = 'ddmendpoint_{0}_unknown_date'.format(ddm_endpoint, )
            logger.warn(
                'Using basic and error prune naming for RSE dump as no date '
                'information was provided, %s dump will be named %s',
                ddm_endpoint,
                sd_prefix,
            )

        storage_dump_fname_sorted = gnu_sort(
            parse_and_filter_file(
                storage_dump,
                parser=strip_storage_dump,
                prefix=sd_prefix,
                cache_dir=cache_dir,
            ),
            prefix=sd_prefix,
            cache_dir=cache_dir,
        )

        with open(prev_date_fname_sorted) as prevf:
            with open(next_date_fname_sorted) as nextf:
                with open(storage_dump_fname_sorted) as sdump:
                    for path, where, status in compare3(prevf, sdump, nextf):
                        prevstatus, nextstatus = status

                        if where[0] and not where[1] and where[2]:
                            if prevstatus == 'A' and nextstatus == 'A':
                                yield cls('LOST', path)

                        if not where[0] and where[1] and not where[2]:
                            yield cls('DARK', path)