def test_real_sample(self): prefix = components('/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/') path_regular = components( '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/rucio/group10/perf-jets/02/1a/group10.perf-jets.data12_8TeV.periodI.physics_HadDelayed.jmr.2015.01.29.v01.log.4770484.000565.log.tgz' ) path_user = components( '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/rucio/user/zxi/fd/73/user.zxi.361100.PowhegPythia8EvtGen.DAOD_TOPQ1.e3601_s2576_s2132_r6630_r6264_p2363.08-12-15.log.6249615.000015.log.tgz' ) path_group = components( '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/rucio/group/det-ibl/00/5d/group.det-ibl.6044653.BTAGSTREAM._000014.root' ) path_sam = components( '/pnfs/grid.sara.nl/data/atlas/atlasscratchdisk/SAM/testfile17-GET-ATLASSCRATCHDISK' ) nose.tools.eq_( '/'.join(remove_prefix(prefix, path_regular)), 'rucio/group10/perf-jets/02/1a/group10.perf-jets.data12_8TeV.periodI.physics_HadDelayed.jmr.2015.01.29.v01.log.4770484.000565.log.tgz', 'Normal path inside directory rucio/', ) nose.tools.eq_( '/'.join(remove_prefix(prefix, path_user)), 'rucio/user/zxi/fd/73/user.zxi.361100.PowhegPythia8EvtGen.DAOD_TOPQ1.e3601_s2576_s2132_r6630_r6264_p2363.08-12-15.log.6249615.000015.log.tgz', 'User path inside rucio/', ) nose.tools.eq_( '/'.join(remove_prefix(prefix, path_group)), 'rucio/group/det-ibl/00/5d/group.det-ibl.6044653.BTAGSTREAM._000014.root', 'Group path inside rucio/', ) nose.tools.eq_( '/'.join(remove_prefix(prefix, path_sam)), 'SAM/testfile17-GET-ATLASSCRATCHDISK', 'SAM path (outside rucio/)', )
def strip_storage_dump(line): ''' Parser to have consistent paths in storage dumps. :param line: String with one line of a dump. :returns: Path formated as in the Rucio Replica Dumps. ''' relative = path_parsing.remove_prefix( prefix_components, path_parsing.components(line), ) if relative[0] == 'rucio': relative = relative[1:] return '/'.join(relative)
def dump(cls, subcommand, ddm_endpoint, storage_dump, prev_date_fname=None, next_date_fname=None, prev_date=None, next_date=None, sort_rucio_replica_dumps=True, date=None, cache_dir=DUMPS_CACHE_DIR): logger = logging.getLogger('auditor.consistency') if subcommand == 'consistency': prev_date_fname = data_models.Replica.download(ddm_endpoint, prev_date, cache_dir=cache_dir) next_date_fname = data_models.Replica.download(ddm_endpoint, next_date, cache_dir=cache_dir) assert prev_date_fname is not None assert next_date_fname is not None else: assert subcommand == 'consistency-manual' prefix = path_parsing.prefix( dumper.agis_endpoints_data(), ddm_endpoint, ) prefix_components = path_parsing.components(prefix) def parser(line): ''' Simple parser for Rucio replica dumps. :param line: String with one line of a dump. :returns: A tuple with the path and status of the replica. ''' fields = line.split('\t') path = fields[6].strip().lstrip('/') status = fields[8].strip() return ','.join((path, status)) def strip_storage_dump(line): ''' Parser to have consistent paths in storage dumps. :param line: String with one line of a dump. :returns: Path formated as in the Rucio Replica Dumps. ''' relative = path_parsing.remove_prefix( prefix_components, path_parsing.components(line), ) if relative[0] == 'rucio': relative = relative[1:] return '/'.join(relative) if sort_rucio_replica_dumps: prev_date_fname_sorted = gnu_sort( parse_and_filter_file(prev_date_fname, parser=parser, cache_dir=cache_dir), delimiter=',', fieldspec='1', cache_dir=cache_dir, ) next_date_fname_sorted = gnu_sort( parse_and_filter_file(next_date_fname, parser=parser, cache_dir=cache_dir), delimiter=',', fieldspec='1', cache_dir=cache_dir, ) else: prev_date_fname_sorted = parse_and_filter_file( prev_date_fname, parser=parser, cache_dir=cache_dir, ) next_date_fname_sorted = parse_and_filter_file( next_date_fname, parser=parser, cache_dir=cache_dir, ) standard_name_re = r'(ddmendpoint_{0}_\d{{2}}-\d{{2}}-\d{{4}}_[0-9a-f]{{40}})$'.format( ddm_endpoint) standard_name_match = re.search(standard_name_re, storage_dump) if standard_name_match is not None: # If the original filename was generated using the expected format, # just use the name as prefix for the parsed file. sd_prefix = standard_name_match.group(0) elif date is not None: # Otherwise try to use the date information and DDMEndpoint name to # have a meaningful filename. sd_prefix = 'ddmendpoint_{0}_{1}'.format( ddm_endpoint, date.strftime('%d-%m-%Y'), ) else: # As last resort use only the DDMEndpoint name, but this is error # prone as old dumps may interfere with the checks. sd_prefix = 'ddmendpoint_{0}_unknown_date'.format(ddm_endpoint, ) logger.warn( 'Using basic and error prune naming for RSE dump as no date ' 'information was provided, %s dump will be named %s', ddm_endpoint, sd_prefix, ) storage_dump_fname_sorted = gnu_sort( parse_and_filter_file( storage_dump, parser=strip_storage_dump, prefix=sd_prefix, cache_dir=cache_dir, ), prefix=sd_prefix, cache_dir=cache_dir, ) with open(prev_date_fname_sorted) as prevf: with open(next_date_fname_sorted) as nextf: with open(storage_dump_fname_sorted) as sdump: for path, where, status in compare3(prevf, sdump, nextf): prevstatus, nextstatus = status if where[0] and not where[1] and where[2]: if prevstatus == 'A' and nextstatus == 'A': yield cls('LOST', path) if not where[0] and where[1] and not where[2]: yield cls('DARK', path)