Beispiel #1
0
def pool_insert(files, catalog_name="xmlcatalog_file:PoolFileCatalog.xml"):
    print ":: inserting [%i] files into pool catalog... (%s)" % (len(files),
                                                                 catalog_name)
    import os, sys
    import commands
    sc, exe = commands.getstatusoutput('which pool_insertFileToCatalog')
    if sc != 0:
        print ":: could not find 'pool_insertFileToCatalog' !"
        print exe
        return 1

    import PyUtils.Helpers as H
    with H.restricted_ldenviron(projects=('AtlasCore', )):
        os.environ['POOL_CATALOG'] = catalog_name
        cmd = "%s %s" % (exe, " ".join(files))
        sc, out = commands.getstatusoutput(cmd)

    out = os.linesep.join([
        o for o in out.splitlines() if not (
            o.startswith(
                "Warning in <TClass::TClass>: no dictionary for class ") or
            o.startswith('Warning in <TEnvRec::ChangeValue>: duplicate entry'))
    ])

    if sc != 0:
        print ":: problem running pool_insertFileToCatalog:"
        print out
        return 2

    print out
    print ":: inserting [%i] files into pool catalog... [done]" % len(files)
    return sc
def pool_extract(files):
    print ":: extracting GUID for [%i] files... " % len(files)
    import os, sys
    import commands
    sc, exe = commands.getstatusoutput('which pool_extractFileIdentifier')
    if sc != 0:
        print ":: could not find 'pool_extractFileIdentifier' !"
        print exe
        return 1

    import PyUtils.Helpers as H
    with H.restricted_ldenviron(projects=('AtlasCore', )):

        cmd = "%s %s" % (exe, " ".join(files))
        sc, out = commands.getstatusoutput(cmd)

    out = os.linesep.join([
        o for o in out.splitlines() if not (
            o.startswith(
                "Warning in <TClass::TClass>: no dictionary for class ") or
            o.startswith('Warning in <TEnvRec::ChangeValue>: duplicate entry'))
    ])

    if sc != 0:
        print ":: problem running pool_extractFileIdentifier:"
        print out
        return sc

    print out
    print ":: extracting GUID for [%i] files... [done]" % len(files)
    return sc
Beispiel #3
0
    def _root_open(self, fname, raw=True):
        root = self.pyroot
        import re
        with H.ShutUp(filters=[
            re.compile('TClass::TClass:0: RuntimeWarning: no dictionary for class.*') ]):
            root.gSystem.Load('libRootCollection')
            root_open = root.TFile.Open

            # we need to get back the protocol b/c of the special
            # case of secure-http which needs to open TFiles as TWebFiles...
            protocol, _ = self.fname(fname)
            if protocol == 'https':
                _setup_ssl(self.msg(), root)
                root_open = root.TWebFile.Open
            if raw:
                if protocol == 'https' and '?' in fname:
                   # append filetype to existing parameters
                   f = root_open(fname+'&filetype=raw', 'READ')
                else:
                   f = root_open(fname+'?filetype=raw', 'READ')
            else:
                f = root_open(fname, 'READ')
            if f is None or not f:
                raise IOError(errno.ENOENT,
                              'No such file or directory',fname)
            return f
        return
Beispiel #4
0
    def __call__(self, fname, evtmax):
        import re
        import PyUtils.Helpers as H
        with H.ShutUp(filters=[re.compile('.*')]):
            f = self._process_call(fname, evtmax, projects=None)

        return f
def pool_insert(files, catalog_name="xmlcatalog_file:PoolFileCatalog.xml"):
    print ":: inserting [%i] files into pool catalog... (%s)"%(
        len (files),
        catalog_name
        )
    import os, sys
    import commands
    sc,exe = commands.getstatusoutput ('which pool_insertFileToCatalog')
    if sc != 0:
        print ":: could not find 'pool_insertFileToCatalog' !"
        print exe
        return 1

    import PyUtils.Helpers as H
    with H.restricted_ldenviron(projects=('AtlasCore',)):
        os.environ['POOL_CATALOG'] = catalog_name
        cmd = "%s %s" % (exe, " ".join(files))
        sc, out = commands.getstatusoutput (cmd)
        
    out = os.linesep.join(
        [o for o in out.splitlines()
         if not (o.startswith("Warning in <TClass::TClass>: no dictionary for class ") or
                 o.startswith('Warning in <TEnvRec::ChangeValue>: duplicate entry'))]
        )

    if sc != 0:
        print ":: problem running pool_insertFileToCatalog:"
        print out
        return 2

    print out
    print ":: inserting [%i] files into pool catalog... [done]"%len(files)
    return sc
Beispiel #6
0
class PoolFile(object):
    """
    A simple class to retrieve informations about the content of a POOL file.
    It should be abstracted from the underlying technology used to create this
    POOL file (Db, ROOT,...).
    Right now, we are using the easy and loosy solution: going straight to the
    ROOT 'API'.
    """
    
    def __init__(self, fileName, verbose=True):
        object.__init__(self)

        self._fileInfos = None
        self.keys       = None
        self.dataHeader = PoolRecord("DataHeader", 0, 0, 0,
                                     nEntries = 0,
                                     dirType = "T")
        self.data       = []
        self.verbose = verbose

        # get the "final" file name (handles all kind of protocols)
        try:
            import PyUtils.AthFile as af
            protocol, fileName = af.server.fname(fileName)
        except Exception,err:
            print "## warning: problem opening PoolFileCatalog:\n%s"%err
            import traceback
            traceback.print_exc(err)
            pass
        
        self.poolFile = None
        dbFileName = whichdb.whichdb( fileName )
        if not dbFileName in ( None, '' ):
            if self.verbose==True:
                print "## opening file [%s]..." % str(fileName)
            db = shelve.open( fileName, 'r' )
            if self.verbose==True:
                print "## opening file [OK]"
            report = db['report']
            self._fileInfos = report['fileInfos']
            self.dataHeader = report['dataHeader']
            self.data       = report['data']
        else:
            import PyUtils.Helpers as _H
            projects = 'AtlasCore' if PoolOpts.FAST_MODE else None
            with _H.restricted_ldenviron (projects=projects):
                if self.verbose==True:
                    print "## opening file [%s]..." % str(fileName)
                self.__openPoolFile( fileName )
                if self.verbose==True:
                    print "## opening file [OK]"
                self.__processFile()
            
        return
 def get_runs_from_tagfile(self, fname):
     # check own cache for this file
     if fname in self.file_cache.keys():
         return self.file_cache[fname]
     # check file type with AthFile - this should avoid reopening files more times than necessary
     msg.debug("Checking file %s" % fname)
     import PyUtils.AthFile as athFile
     fileinfos = athFile.fopen(fname).fileinfos
     if not 'TAG' in fileinfos['stream_names']:
         return []
     # this is a TAG file, open it and read all run numbers
     # fileinfos have only the run number from the first TAG
     import PyUtils.Helpers as H
     with H.restricted_ldenviron(projects=['AtlasCore']):
         import re
         with H.ShutUp(filters=[
                 re.compile(
                     'TClass::TClass:0: RuntimeWarning: no dictionary for.*'
                 ),
                 re.compile('.*duplicate entry.*')
         ]):
             msg.debug("Opening TAG file %s" % fname)
             import PyUtils.RootUtils as ru
             f = ru.import_root().TFile.Open(fname, "read")
             if f is None or not f:
                 msg.warning("Failed to open TAG file %s" % fname)
                 return []
             coll_tree = f.Get('POOLCollectionTree')
             run_numbers = set()
             if coll_tree is not None:
                 for row in xrange(0, coll_tree.GetEntries()):
                     coll_tree.GetEntry(row)
                     run_numbers.add(getattr(coll_tree, self.run_attr_name))
                 del coll_tree
             f.Close()
             del f
             self.file_cache[fname] = run_numbers
             msg.info("TAG file: %s, found runs: %s" %
                      (fname, str(run_numbers)))
             return run_numbers
def _pythonize_tfile():
    import PyCintex
    PyCintex.Cintex.Enable()
    root = import_root()
    import PyUtils.Helpers as H
    with H.ShutUp(filters=[
            re.compile(
                'TClass::TClass:0: RuntimeWarning: no dictionary for.*'),
            re.compile('Warning in <TEnvRec::ChangeValue>: duplicate entry.*'),
    ]):
        PyCintex.loadDict("RootUtilsPyROOTDict")
        rootutils = getattr(root, "RootUtils")
        pybytes = getattr(rootutils, "PyBytes")
        read_root_file = getattr(rootutils, "_pythonize_read_root_file")
        tell_root_file = getattr(rootutils, "_pythonize_tell_root_file")
        pass

    def read(self, size=-1):
        """read([size]) -> read at most size bytes, returned as a string.

        If the size argument is negative or omitted, read until EOF is reached.
        Notice that when in non-blocking mode, less data than what was requested
        may be returned, even if no size parameter was given.

        FIXME: probably doesn't follow python file-like conventions...
        """
        SZ = 4096

        if size >= 0:
            #size = _adjust_sz(size)
            #print "-->0",self.tell(),size
            c_buf = read_root_file(self, size)
            if c_buf and c_buf.sz:
                #print "-->1",self.tell(),c_buf.sz
                #self.seek(c_buf.sz+self.tell())
                #print "-->2",self.tell()
                buf = c_buf.buffer()
                buf.SetSize(c_buf.sz)
                return str(buf[:])
            return ''
        else:
            size = SZ
            out = []
            while True:
                #size = _adjust_sz(size)
                c_buf = read_root_file(self, size)
                if c_buf and c_buf.sz:
                    buf = c_buf.buffer()
                    buf.SetSize(c_buf.sz)
                    out.append(str(buf[:]))
                else:
                    break
            return ''.join(out)

    root.TFile.read = read
    del read

    root.TFile.seek = root.TFile.Seek
    root.TFile.tell = lambda self: tell_root_file(self)
    ## import os
    ## def tell(self):
    ##     fd = os.dup(self.GetFd())
    ##     return os.fdopen(fd).tell()
    ## root.TFile.tell = tell
    ## del tell
    return
Beispiel #9
0
def main(args):
    """check that 2 ROOT files have same content (containers and sizes)
    """
    global g_args
    g_args = args

    import PyUtils.RootUtils as ru
    root = ru.import_root()

    import PyUtils.Logging as L
    msg = L.logging.getLogger('diff-root')
    msg.setLevel(L.logging.INFO)

    from PyUtils.Helpers import ShutUp, ROOT6Setup
    ROOT6Setup()

    if args.entries == '':
        args.entries = -1

    msg.info('comparing tree [%s] in files:', args.tree_name)
    msg.info(' old: [%s]', args.old)
    msg.info(' new: [%s]', args.new)
    msg.info('ignore  leaves: %s', args.ignore_leaves)
    msg.info('enforce leaves: %s', args.enforce_leaves)
    msg.info('hacks:          %s', args.known_hacks)
    msg.info('entries:        %s', args.entries)
    msg.info('mode:           %s', args.mode)
    msg.info('error mode:     %s', args.error_mode)

    import PyUtils.Helpers as H
    with H.ShutUp():
        fold = ru.RootFileDumper(args.old, args.tree_name)
        fnew = ru.RootFileDumper(args.new, args.tree_name)
        pass

    def tree_infos(tree, args):
        nentries = tree.GetEntriesFast()
        # l.GetBranch().GetName() gives the full leaf path name
        leaves = [
            l.GetBranch().GetName() for l in tree.GetListOfLeaves()
            if l.GetBranch().GetName() not in args.ignore_leaves
        ]
        return {
            'entries': nentries,
            'leaves': set(leaves),
        }

    def diff_tree(fold, fnew, args):
        infos = {
            'old': tree_infos(fold.tree, args),
            'new': tree_infos(fnew.tree, args),
        }

        nentries = min(infos['old']['entries'], infos['new']['entries'])
        itr_entries = nentries
        if args.entries in (-1, '', '-1'):
            #msg.info('comparing over [%s] entries...', nentries)
            itr_entries = nentries
            if infos['old']['entries'] != infos['new']['entries']:
                msg.info('different numbers of entries:')
                msg.info(' old: [%s]', infos['old']['entries'])
                msg.info(' new: [%s]', infos['new']['entries'])
                msg.info('=> comparing [%s] first entries...', nentries)
        else:
            itr_entries = args.entries
            pass
        msg.info('comparing over [%s] entries...', itr_entries)

        old_leaves = infos['old']['leaves'] - infos['new']['leaves']
        if old_leaves:
            msg.warning('the following variables exist only in the old file !')
            for l in old_leaves:
                msg.warning(' - [%s]', l)
        new_leaves = infos['new']['leaves'] - infos['old']['leaves']
        if new_leaves:
            msg.warning('the following variables exist only in the new file !')
            for l in new_leaves:
                msg.warning(' - [%s]', l)
        skip_leaves = old_leaves | new_leaves | set(args.ignore_leaves)

        leaves = infos['old']['leaves'] & infos['new']['leaves']
        msg.info('comparing [%s] leaves over entries...', len(leaves))
        all_good = True
        n_good = 0
        n_bad = 0
        import collections
        from itertools import izip
        summary = collections.defaultdict(int)

        old_dump_iter = fold.dump(args.tree_name, itr_entries)
        new_dump_iter = fnew.dump(args.tree_name, itr_entries)

        def leafname_fromdump(entry):
            return '.'.join([s for s in entry[2] if not s.isdigit()])

        def reach_next(dump_iter, skip_leaves):
            keep_reading = True
            while keep_reading:
                try:
                    entry = dump_iter.next()
                except StopIteration:
                    return None
                entry[2][0] = entry[2][0].rstrip('.\0')  # clean branch name
                name = []
                skip = False
                for n in leafname_fromdump(entry).split('.'):
                    name.append(n)
                    if '.'.join(name) in skip_leaves:
                        skip = True
                        break
                if not skip:
                    return entry
                # print 'SKIP:', leafname_fromdump(entry)
            pass

        read_old = True
        read_new = True
        d_old = None
        d_new = None

        while True:
            if read_old:
                prev_d_old = d_old
                d_old = reach_next(old_dump_iter, skip_leaves)
            if read_new:
                prev_d_new = d_new
                d_new = reach_next(new_dump_iter, skip_leaves)

            if not d_new and not d_old:
                break

            read_old = True
            read_new = True
            if d_old == d_new:
                n_good += 1
                continue

            if d_old:
                tree_name, ientry, name, iold = d_old
            if d_new:
                tree_name, ientry, name, inew = d_new

            # FIXME: that's a plain (temporary?) hack
            if name[-1] in args.known_hacks:
                continue

            n_bad += 1

            in_synch = d_old and d_new and d_old[:-1] == d_new[:-1]
            if not in_synch:
                if not _is_summary():
                    if d_old:
                        print '::sync-old %s' % '.'.join(["%03i" % ientry] +
                                                         map(str, d_old[2]))
                    else:
                        print '::sync-old ABSENT'
                    if d_new:
                        print '::sync-new %s' % '.'.join(["%03i" % ientry] +
                                                         map(str, d_new[2]))
                    else:
                        print '::sync-new ABSENT'
                    pass
                # remember for later
                if not d_old:
                    fold.allgood = False
                    summary[d_new[2][0]] += 1
                elif not d_new:
                    fnew.allgood = False
                    summary[d_old[2][0]] += 1
                else:
                    branch_old = '.'.join(["%03i" % ientry, d_old[2][0]])
                    branch_new = '.'.join(["%03i" % ientry, d_new[2][0]])
                    if branch_old < branch_new:
                        if not _is_summary():
                            print '::sync-old skipping entry'
                        summary[d_old[2][0]] += 1
                        fnew.allgood = False
                        read_new = False
                    elif branch_old > branch_new:
                        if not _is_summary():
                            print '::sync-new skipping entry'
                        summary[d_new[2][0]] += 1
                        fold.allgood = False
                        read_old = False
                    else:
                        # MN: difference in the leaves
                        prev_leaf_old = leafname_fromdump(prev_d_old)
                        prev_leaf_new = leafname_fromdump(prev_d_new)
                        leaf_old = leafname_fromdump(d_old)
                        leaf_new = leafname_fromdump(d_new)
                        if prev_leaf_old == prev_leaf_new:
                            # array size difference?
                            if leaf_old == leaf_new and leaf_old == prev_leaf_old:
                                # could be a size difference in >1 dim arrays
                                # hard to sync, skipping both
                                pass
                            elif leaf_old == prev_leaf_old:
                                # old has bigger array, skip old entry
                                read_new = False
                                if not _is_summary():
                                    print '::sync-old skipping entry'
                                summary[leaf_old] += 1
                            elif leaf_new == prev_leaf_new:
                                # new has bigger array, skip new entry
                                read_old = False
                                if not _is_summary():
                                    print '::sync-new skipping entry'
                                summary[leaf_new] += 1

                        if read_old and read_new:
                            summary[d_new[2][0]] += 1
                            if not _is_summary():
                                print '::sync-old+new skipping both entries'
                        fold.allgood = False
                        fnew.allgood = False

                if _is_exit_early():
                    print "*** exit on first error ***"
                    break
                continue

            n = '.'.join(map(str, ["%03i" % ientry] + name))
            diff_value = 'N/A'
            try:
                diff_value = 50. * (iold - inew) / (iold + inew)
                diff_value = '%.8f%%' % (diff_value, )
            except Exception:
                pass
            if not _is_summary():
                print '%s %r -> %r => diff= [%s]' % (n, iold, inew, diff_value)
                pass
            summary[leafname_fromdump(d_old)] += 1

            if name[0] in args.enforce_leaves:
                msg.info("don't compare further")
                all_good = False
                break
            pass  # loop over events/branches

        msg.info('Found [%s] identical leaves', n_good)
        msg.info('Found [%s] different leaves', n_bad)

        if not _is_summary():
            keys = sorted(summary.keys())
            for n in keys:
                v = summary[n]
                msg.info(' [%s]: %i leaves differ', n, v)
                pass
            pass

        if (not fold.allgood) or (not fnew.allgood):
            msg.info('NOTE: there were errors during the dump')
            msg.info('fold.allgood: %s' % fold.allgood)
            msg.info('fnew.allgood: %s' % fnew.allgood)
            n_bad += 0.5
        return n_bad

    ndiff = diff_tree(fold, fnew, args)
    if ndiff != 0:
        msg.info('files differ!')
        return 2
    msg.info('all good.')
    return 0
Beispiel #10
0
def main(args):
    """diff two ROOT files (containers and sizes)"""

    global g_args
    g_args = args

    import PyUtils.RootUtils as ru
    root = ru.import_root()  # noqa: F841

    import PyUtils.Logging as L
    msg = L.logging.getLogger('diff-root')
    if args.verbose:
        msg.setLevel(L.logging.VERBOSE)
    else:
        msg.setLevel(L.logging.INFO)

    from PyUtils.Helpers import ShutUp  # noqa: F401

    if args.entries == '':
        args.entries = -1

    msg.info('comparing tree [%s] in files:', args.tree_name)
    msg.info(' old: [%s]', args.old)
    msg.info(' new: [%s]', args.new)
    msg.info('ignore  leaves: %s', args.ignore_leaves)
    msg.info('enforce leaves: %s', args.enforce_leaves)
    msg.info('leaves prefix:  %s', args.leaves_prefix)
    msg.info('hacks:          %s', args.known_hacks)
    msg.info('entries:        %s', args.entries)
    msg.info('mode:           %s', args.mode)
    msg.info('error mode:     %s', args.error_mode)
    msg.info('order trees:    %s', args.order_trees)

    import PyUtils.Helpers as H
    with H.ShutUp():
        fold = ru.RootFileDumper(args.old, args.tree_name)
        fnew = ru.RootFileDumper(args.new, args.tree_name)
        pass

    def tree_infos(tree, args):
        nentries = tree.GetEntriesFast()
        # l.GetBranch().GetName() gives the full leaf path name
        leaves = [
            l.GetBranch().GetName() for l in tree.GetListOfLeaves()
            if l.GetBranch().GetName() not in args.ignore_leaves
        ]
        if args.leaves_prefix:
            leaves = [l.replace(args.leaves_prefix, '') for l in leaves]
        return {
            'entries': nentries,
            'leaves': set(leaves),
        }

    def ordered_indices(tree, reverse_order=False):
        from collections import OrderedDict
        import operator

        dict_in = {}
        nevts = tree.GetEntriesFast()

        for idx in range(0, nevts):
            if idx % 100 == 0:
                msg.debug('Read {} events from the input so far'.format(idx))
            tree.GetEntry(idx)
            if hasattr(tree, 'xAOD::EventAuxInfo_v2_EventInfoAux.'):
                event_info = getattr(tree,
                                     'xAOD::EventAuxInfo_v2_EventInfoAux.')
                event_number = event_info.eventNumber
            elif hasattr(tree, 'xAOD::EventAuxInfo_v1_EventInfoAux.'):
                event_info = getattr(tree,
                                     'xAOD::EventAuxInfo_v1_EventInfoAux.')
                event_number = event_info.eventNumber
            elif hasattr(tree, 'EventInfoAux.'):
                event_info = getattr(tree, 'EventInfoAux.')
                event_number = event_info.eventNumber
            elif hasattr(tree, 'EventInfo_p4_McEventInfo'):
                event_info = getattr(tree, 'EventInfo_p4_McEventInfo')
                event_number = event_info.m_event_ID.m_event_number
            elif hasattr(tree, 'EventInfo_p4_ByteStreamEventInfo'):
                event_info = getattr(tree, 'EventInfo_p4_ByteStreamEventInfo')
                event_number = event_info.m_event_ID.m_event_number
            elif hasattr(tree, 'ByteStreamEventInfo'):
                event_info = getattr(tree, 'ByteStreamEventInfo')
                event_number = event_info.m_event_ID.m_event_number
            else:
                msg.error('Cannot read event info, will bail out.')
                break
            msg.debug('Idx : EvtNum {:10d} : {}'.format(idx, event_number))
            dict_in[idx] = event_number

        # Sort the dictionary by event numbers
        dict_out = OrderedDict(
            sorted(dict_in.items(),
                   key=operator.itemgetter(1),
                   reverse=reverse_order))

        # Write out the ordered index list
        return [idx for idx in dict_out]

    def diff_tree(fold, fnew, args):
        infos = {
            'old': tree_infos(fold.tree, args),
            'new': tree_infos(fnew.tree, args),
        }

        nentries = min(infos['old']['entries'], infos['new']['entries'])
        itr_entries = nentries
        if args.entries in (-1, '', '-1'):
            #msg.info('comparing over [%s] entries...', nentries)
            itr_entries = nentries
            if infos['old']['entries'] != infos['new']['entries']:
                msg.info('different numbers of entries:')
                msg.info(' old: [%s]', infos['old']['entries'])
                msg.info(' new: [%s]', infos['new']['entries'])
                msg.info('=> comparing [%s] first entries...', nentries)
        else:
            itr_entries = args.entries
            pass
        msg.info('comparing over [%s] entries...', itr_entries)

        old_leaves = infos['old']['leaves'] - infos['new']['leaves']
        if old_leaves:
            msg.warning('the following variables exist only in the old file !')
            for l in old_leaves:
                msg.warning(' - [%s]', l)
        new_leaves = infos['new']['leaves'] - infos['old']['leaves']
        if new_leaves:
            msg.warning('the following variables exist only in the new file !')
            for l in new_leaves:
                msg.warning(' - [%s]', l)

        # need to remove trailing dots as they confuse reach_next()
        skip_leaves = [
            l.rstrip('.')
            for l in old_leaves | new_leaves | set(args.ignore_leaves)
        ]
        for l in skip_leaves:
            msg.debug('skipping [%s]', l)

        leaves = infos['old']['leaves'] & infos['new']['leaves']
        msg.info('comparing [%s] leaves over entries...', len(leaves))
        n_good = 0
        n_bad = 0
        import collections
        summary = collections.defaultdict(int)

        if args.order_trees:
            slice_max = int(itr_entries) if int(itr_entries) > 0 else None
            itr_entries_old = ordered_indices(fold.tree)[0:slice_max]
            itr_entries_new = ordered_indices(fnew.tree)[0:slice_max]
            msg.debug('List of old indices {}'.format(itr_entries_old))
            msg.debug('List of new indices {}'.format(itr_entries_new))
        else:
            itr_entries_old = itr_entries
            itr_entries_new = itr_entries

        old_dump_iter = fold.dump(args.tree_name, itr_entries_old)
        new_dump_iter = fnew.dump(args.tree_name, itr_entries_new)

        def leafname_fromdump(entry):
            return '.'.join([s for s in entry[2] if not s.isdigit()])

        def reach_next(dump_iter, skip_leaves, leaves_prefix=None):
            keep_reading = True
            while keep_reading:
                try:
                    entry = next(dump_iter)
                except StopIteration:
                    return None
                entry[2][0] = entry[2][0].rstrip('.\0')  # clean branch name
                if leaves_prefix:
                    entry[2][0] = entry[2][0].replace(leaves_prefix, '')
                name = []
                skip = False
                for n in leafname_fromdump(entry).split('.'):
                    name.append(n)
                    if '.'.join(name) in skip_leaves or n in skip_leaves:
                        skip = True
                        break
                if not skip:
                    return entry
                # print('SKIP:', leafname_fromdump(entry))
            pass

        read_old = True
        read_new = True
        d_old = None
        d_new = None

        while True:
            if read_old:
                prev_d_old = d_old
                d_old = reach_next(old_dump_iter, skip_leaves,
                                   args.leaves_prefix)
            if read_new:
                prev_d_new = d_new
                d_new = reach_next(new_dump_iter, skip_leaves,
                                   args.leaves_prefix)

            if not d_new and not d_old:
                break

            read_old = True
            read_new = True

            if (args.order_trees and d_old and d_new
                    and d_old[-1] == d_new[-1]) or d_old == d_new:
                n_good += 1
                continue

            if d_old:
                tree_name, ientry, name, iold = d_old
            if d_new:
                tree_name, jentry, name, inew = d_new

            # for regression testing we should have NAN == NAN
            if args.nan_equal:
                if all(
                    [isinstance(x, Real) and isnan(x) for x in [iold, inew]]):
                    n_good += 1
                    continue

            # FIXME: that's a plain (temporary?) hack
            if name[-1] in args.known_hacks:
                continue

            n_bad += 1

            if not args.order_trees:
                in_synch = d_old and d_new and d_old[:-1] == d_new[:-1]
            else:
                in_synch = d_old and d_new and d_old[0] == d_new[0] and d_old[
                    2] == d_new[2]
            if not in_synch:
                if _is_detailed():
                    if d_old:
                        print('::sync-old %s' %
                              '.'.join(["%03i" % ientry] +
                                       list(map(str, d_old[2]))))
                    else:
                        print('::sync-old ABSENT')
                    if d_new:
                        print('::sync-new %s' %
                              '.'.join(["%03i" % jentry] +
                                       list(map(str, d_new[2]))))
                    else:
                        print('::sync-new ABSENT')
                    pass
                # remember for later
                if not d_old:
                    fold.allgood = False
                    summary[d_new[2][0]] += 1
                elif not d_new:
                    fnew.allgood = False
                    summary[d_old[2][0]] += 1
                else:
                    branch_old = '.'.join(["%03i" % ientry, d_old[2][0]])
                    branch_new = '.'.join(["%03i" % jentry, d_new[2][0]])
                    if branch_old < branch_new:
                        if _is_detailed():
                            print('::sync-old skipping entry')
                        summary[d_old[2][0]] += 1
                        fnew.allgood = False
                        read_new = False
                    elif branch_old > branch_new:
                        if _is_detailed():
                            print('::sync-new skipping entry')
                        summary[d_new[2][0]] += 1
                        fold.allgood = False
                        read_old = False
                    else:
                        # MN: difference in the leaves
                        prev_leaf_old = leafname_fromdump(prev_d_old)
                        prev_leaf_new = leafname_fromdump(prev_d_new)
                        leaf_old = leafname_fromdump(d_old)
                        leaf_new = leafname_fromdump(d_new)
                        if prev_leaf_old == prev_leaf_new:
                            # array size difference?
                            if leaf_old == leaf_new and leaf_old == prev_leaf_old:
                                # could be a size difference in >1 dim arrays
                                # hard to sync, skipping both
                                pass
                            elif leaf_old == prev_leaf_old:
                                # old has bigger array, skip old entry
                                read_new = False
                                if _is_detailed():
                                    print('::sync-old skipping entry')
                                summary[leaf_old] += 1
                            elif leaf_new == prev_leaf_new:
                                # new has bigger array, skip new entry
                                read_old = False
                                if _is_detailed():
                                    print('::sync-new skipping entry')
                                summary[leaf_new] += 1

                        if read_old and read_new:
                            summary[d_new[2][0]] += 1
                            if _is_detailed():
                                print('::sync-old+new skipping both entries')
                        fold.allgood = False
                        fnew.allgood = False

                if _is_exit_early():
                    print('*** exit on first error ***')
                    break
                continue

            if not args.order_trees:
                n = '.'.join(list(map(str, ["%03i" % ientry] + name)))
            else:
                n = '.'.join(
                    list(map(str, ["%03i.%03i" % (ientry, jentry)] + name)))
            diff_value = 'N/A'
            try:
                diff_value = 50. * (iold - inew) / (iold + inew)
                diff_value = '%.8f%%' % (diff_value, )
            except Exception:
                pass
            if _is_detailed():
                print('%s %r -> %r => diff= [%s]' %
                      (n, iold, inew, diff_value))
                pass
            summary[leafname_fromdump(d_old)] += 1

            if name[0] in args.enforce_leaves:
                msg.info("don't compare further")
                break
            pass  # loop over events/branches

        msg.info('Found [%s] identical leaves', n_good)
        msg.info('Found [%s] different leaves', n_bad)

        if not _is_summary():
            keys = sorted(summary.keys())
            for n in keys:
                v = summary[n]
                msg.info(' [%s]: %i leaves differ', n, v)
                pass
            pass

        if (not fold.allgood) or (not fnew.allgood):
            msg.info('NOTE: there were errors during the dump')
            msg.info('fold.allgood: %s', fold.allgood)
            msg.info('fnew.allgood: %s', fnew.allgood)
            n_bad += 0.5
        return n_bad

    ndiff = diff_tree(fold, fnew, args)
    if ndiff != 0:
        msg.info('files differ!')
        return 2
    msg.info('all good.')
    return 0
Beispiel #11
0
def main(args):
    """check that 2 ROOT files have same content (containers and sizes)
    """
    global g_args
    g_args = args
    
    import PyUtils.RootUtils as ru
    root = ru.import_root()

    import PyUtils.Logging as L
    msg = L.logging.getLogger('diff-root')
    msg.setLevel(L.logging.INFO)

    if args.entries == '':
        args.entries = -1
        
    msg.info('comparing tree [%s] in files:', args.tree_name)
    msg.info(' old: [%s]', args.old)
    msg.info(' new: [%s]', args.new)
    msg.info('ignore  leaves: %s', args.ignore_leaves)
    msg.info('enforce leaves: %s', args.enforce_leaves)
    msg.info('hacks:          %s', args.known_hacks)
    msg.info('entries:        %s', args.entries)
    msg.info('mode:           %s', args.mode)
    msg.info('error mode:     %s', args.error_mode)

    import PyUtils.Helpers as H
    with H.ShutUp() :
        fold = ru.RootFileDumper(args.old, args.tree_name)
        fnew = ru.RootFileDumper(args.new, args.tree_name)
        pass
    
    def tree_infos(tree, args):
        nentries = tree.GetEntriesFast()
        leaves = [l.GetName() for l in tree.GetListOfLeaves()
                  if l not in args.ignore_leaves]
        return {
            'entries' : nentries,
            'leaves': set(leaves),
            }
    
    def diff_tree(fold, fnew, args):
        infos = {
            'old' : tree_infos(fold.tree, args),
            'new' : tree_infos(fnew.tree, args),
            }

        nentries = min(infos['old']['entries'],
                       infos['new']['entries'])
        itr_entries = nentries
        if args.entries in (-1,'','-1'):
            #msg.info('comparing over [%s] entries...', nentries)
            itr_entries = nentries
            if infos['old']['entries'] != infos['new']['entries']:
                msg.info('different numbers of entries:')
                msg.info(' old: [%s]', infos['old']['entries'])
                msg.info(' new: [%s]', infos['new']['entries'])
                msg.info('=> comparing [%s] first entries...', nentries)
        else:
            itr_entries = args.entries
            pass
        msg.info('comparing over [%s] entries...', itr_entries)
        
        leaves = infos['old']['leaves'] & infos['new']['leaves']
        diff_leaves = infos['old']['leaves'] - infos['new']['leaves']
        if diff_leaves:
            msg.info('the following variables exist in only one tree !')
            for l in diff_leaves:
                msg.info(' - [%s]', l)
        leaves = leaves - set(args.ignore_leaves)
        
        msg.info('comparing [%s] leaves over entries...', len(leaves))
        all_good = True
        n_good = 0
        n_bad = 0
        import collections
        from itertools import izip
        summary = collections.defaultdict(int)
        for d in izip(fold.dump(args.tree_name, itr_entries),
                      fnew.dump(args.tree_name, itr_entries)):
            tree_name, ientry, name, iold = d[0]
            _,              _,    _, inew = d[1]
            name[0] = name[0].rstrip('\0')
            if ((not (name[0] in leaves)) or
                # FIXME: that's a plain (temporary?) hack
                name[-1] in args.known_hacks):
                continue
            
            if d[0] == d[1]:
                diff = False
                n_good += 1
                continue
            n_bad += 1
            diff = True

            in_synch = d[0][:-1] == d[1][:-1]
            if not in_synch:
                if not _is_summary():
                    print '::sync-old %s' % \
                          '.'.join(["%03i"%ientry]+map(str, d[0][2]))
                    print '::sync-new %s' % \
                          '.'.join(["%03i"%ientry]+map(str, d[1][2]))
                    pass
                summary[name[0]] += 1
                # remember for later
                fold.allgood = False
                fnew.allgood = False

                if _is_exit_early():
                    print "*** exit on first error ***"
                    break
                continue
            
            n = '.'.join(map(str, ["%03i"%ientry]+name))
            diff_value = 'N/A'
            try:
                diff_value = 50.*(iold-inew)/(iold+inew)
                diff_value = '%.8f%%' % (diff_value,)
            except Exception:
                pass
            if not _is_summary():
                print '%s %r -> %r => diff= [%s]' %(n, iold, inew, diff_value)
                pass
            summary[name[0]] += 1

            if name[0] in args.enforce_leaves:
                msg.info("don't compare further")
                all_good = False
                break
            pass # loop over events/branches
        
        msg.info('Found [%s] identical leaves', n_good)
        msg.info('Found [%s] different leaves', n_bad)

        if not _is_summary():
            keys = sorted(summary.keys())
            for n in keys:
                v = summary[n]
                msg.info(' [%s]: %i leaves differ', n, v)
                pass
            pass
        
        if (not fold.allgood) or (not fnew.allgood):
            msg.info('NOTE: there were errors during the dump')
            msg.info('fold.allgood: %s' % fold.allgood)
            msg.info('fnew.allgood: %s' % fnew.allgood)
            n_bad += 0.5
        return n_bad
    
    ndiff = diff_tree(fold, fnew, args)
    if ndiff != 0:
        msg.info('files differ!')
        return 2
    msg.info('all good.')
    return 0
Beispiel #12
0
def _pythonize_tfile():
    import cppyy
    root = import_root()
    import PyUtils.Helpers as H
    with H.ShutUp(filters=[
        re.compile(
            'TClass::TClass:0: RuntimeWarning: no dictionary for.*'),
        re.compile(
            'Warning in <TEnvRec::ChangeValue>: duplicate entry.*'
            ),
        ]):
        cppyy.loadDict("RootUtilsPyROOTDict")
        rootutils = getattr(root, "RootUtils")
        pybytes        = getattr(rootutils, "PyBytes")  # noqa: F841
        #MN: lines below fail in ROOT6 if PCM from RootUtils is not found
        read_root_file = getattr(rootutils, "_pythonize_read_root_file")
        tell_root_file = getattr(rootutils, "_pythonize_tell_root_file")
        pass
    def read(self, size=-1):
        """read([size]) -> read at most size bytes, returned as a string.

        If the size argument is negative or omitted, read until EOF is reached.
        Notice that when in non-blocking mode, less data than what was requested
        may be returned, even if no size parameter was given.

        FIXME: probably doesn't follow python file-like conventions...
        """
        SZ = 4096

        # FIXME: Once we drop py2, we can simplify this by using a bytes
        # object directly instead of PyBytes.
        if size>=0:
            #size = _adjust_sz(size)
            #print ("-->0",self.tell(),size)
            c_buf = read_root_file(self, size)
            if c_buf and c_buf.sz:
                v = c_buf.buf
                if six.PY3:
                    return bytes([ord(v[i]) for i in range(v.size())])
                return ''.join([v[i] for i in range(v.size())])
            return ''
        else:
            size = SZ
            out = []
            while True:
                #size = _adjust_sz(size)
                c_buf = read_root_file(self, size)
                if c_buf and c_buf.sz:
                    v = c_buf.buf
                    if six.PY3:
                        chunk = bytes([ord(v[i]) for i in range(v.size())])
                    else:
                        chunk = ''.join([v[i] for i in range(v.size())])
                    out.append(chunk)
                else:
                    break
            if six.PY3:
                return b''.join(out)
            return ''.join(out)
            
    root.TFile.read = read
    del read
    
    root.TFile.seek = root.TFile.Seek
    root.TFile.tell = lambda self: tell_root_file(self)
    ## import os
    ## def tell(self):
    ##     fd = os.dup(self.GetFd())
    ##     return os.fdopen(fd).tell()
    ## root.TFile.tell = tell
    ## del tell
    return