def getRunsInTheFile(filenameOrFile, baseRun="/"):
        """
        Gets all the Runs in the file (ie, all the tables of all runs.)

        """
        closeTheFile = False
        if isinstance(filenameOrFile, File):
            hdfFile = filenameOrFile
        else:
            closeTheFile = True
            try:
                hdfFile = openFile(filenameOrFile, mode="a")
            except:
                return []
        baseRun = SofiePyTableAccess._createNiceRunName(baseRun)
        try:
            theGroup = hdfFile.getNode(baseRun)
            if not isinstance(theGroup, Group):
                return [theGroup._v_pathname]
            runs = [node._v_pathname for node in hdfFile.walkNodes(baseRun) if isinstance(node, Table)]
            logging.debug("The Runs: " + str(runs))
            if closeTheFile:
                hdfFile.close()
            return runs
        except NoSuchNodeError:
            logging.warning("Path does not exist: " + baseRun)
            if closeTheFile:
                hdfFile.close()
            return []
 def __init__(self, filename, runName="/test", allowDuplicates=False, description=""):
     self.table = None
     """ Of Type Node/Group."""
     self.run = None
     extensionOut = os.path.splitext(filename)
     filename = extensionOut[0] + ".h5"
     self.theTableName = None
     self.theTableStructure = None
     self.hdfFile = openFile(filename, mode="a")
     self.filename = filename
     self.__allowDuplicates = allowDuplicates
     logging.debug("Setting the Run: " + str(self.getRuns()))
     self.setRunName(runName)
     logging.debug("Setting the RunMeta" + str(self.getRuns()))
     self.setRunMeta(runName, description)
     logging.debug("Finished Setting the RunMeta" + str(self.getRuns()))
Example #3
0
def copyLeaf(srcfile, dstfile, srcnode, dstnode, title,
             filters, copyuserattrs, overwritefile, overwrtnodes, stats,
             start, stop, step, chunkshape, sortby, checkCSI,
             propindexes, upgradeflavors):
    # Open the source file
    srcfileh = openFile(srcfile, 'r')
    # Get the source node (that should exist)
    srcNode = srcfileh.getNode(srcnode)

    # Get the destination node and its parent
    last_slash = dstnode.rindex('/')
    if last_slash == len(dstnode)-1:
        # print "Detected a trailing slash in destination node. Interpreting it as a destination group."
        dstgroup = dstnode[:-1]
    elif last_slash > 0:
        dstgroup = dstnode[:last_slash]
    else:
        dstgroup = "/"
    dstleaf = dstnode[last_slash+1:]
    if dstleaf == "":
        dstleaf = srcNode.name
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = openFile(dstfile, 'a', PYTABLES_SYS_ATTRS=createsysattrs)
        try:
            dstGroup = dstfileh.getNode(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstGroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstGroup._v_parent
                    last_slash = dstGroup._v_pathname.rindex('/')
                    dstgroupname = dstGroup._v_pathname[last_slash+1:]
                    dstGroup.remove()
                    dstGroup = dstfileh.createGroup(parent, dstgroupname,
                                                    title=title,
                                                    filters=filters)
                else:
                    raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = openFile(dstfile, 'w', title=title, filters=filters,
                            PYTABLES_SYS_ATTRS=createsysattrs)
        dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)

    # Finally, copy srcNode to dstNode
    try:
        dstNode = srcNode.copy(
            dstGroup, dstleaf, filters = filters,
            copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, checkCSI = checkCSI, propindexes = propindexes)
    except:
        (type, value, traceback) = sys.exc_info()
        print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
              (srcfile, srcnode, dstfile, dstnode)
        print "The error was --> %s: %s" % (type, value)
        print "The destination file looks like:\n", dstfileh
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."

    # Upgrade flavors in dstNode, if required
    if upgradeflavors and srcfileh.format_version.startswith("1"):
        # Remove original flavor in case the source file has 1.x format
        dstNode.delAttr('FLAVOR')

    # Recreate possible old indexes in destination node
    if srcNode._c_classId == "TABLE":
        recreateIndexes(srcNode, dstfileh, dstNode)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #4
0
def main():
    global verbose
    global regoldindexes
    global createsysattrs

    usage = """usage: %s [-h] [-v] [-o] [-R start,stop,step] [--non-recursive] [--dest-title=title] [--dont-create-sysattrs] [--dont-copy-userattrs] [--overwrite-nodes] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--keep-source-filters] [--chunkshape=value] [--upgrade-flavors] [--dont-regenerate-old-indexes] [--sortby=column] [--checkCSI] [--propindexes] sourcefile:sourcegroup destfile:destgroup
     -h -- Print usage message.
     -v -- Show more information.
     -o -- Overwrite destination file.
     -R RANGE -- Select a RANGE of rows (in the form "start,stop,step")
         during the copy of *all* the leaves.  Default values are
         "None,None,1", which means a copy of all the rows.
     --non-recursive -- Do not do a recursive copy. Default is to do it.
     --dest-title=title -- Title for the new file (if not specified,
         the source is copied).
     --dont-create-sysattrs -- Do not create sys attrs (default is to do it).
     --dont-copy-userattrs -- Do not copy the user attrs (default is to do it).
     --overwrite-nodes -- Overwrite destination nodes if they exist. Default is
         to not overwrite them.
     --complevel=(0-9) -- Set a compression level (0 for no compression, which
         is the default).
     --complib=lib -- Set the compression library to be used during the copy.
         lib can be set to "zlib", "lzo", "bzip2" or "blosc".  Defaults to
         "zlib".
     --shuffle=(0|1) -- Activate or not the shuffling filter (default is active
         if complevel>0).
     --fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter
        (not active by default).
     --keep-source-filters -- Use the original filters in source files. The
         default is not doing that if any of --complevel, --complib, --shuffle
         or --fletcher32 option is specified.
     --chunkshape=("keep"|"auto"|int|tuple) -- Set a chunkshape.  A value
         of "auto" computes a sensible value for the chunkshape of the
         leaves copied.  The default is to "keep" the original value.
     --upgrade-flavors -- When repacking PyTables 1.x files, the flavor of
         leaves will be unset. With this, such a leaves will be serialized
         as objects with the internal flavor ('numpy' for 2.x series).
     --dont-regenerate-old-indexes -- Disable regenerating old indexes. The
         default is to regenerate old indexes as they are found.
     --sortby=column -- Do a table copy sorted by the index in "column".
         For reversing the order, use a negative value in the "step" part of
         "RANGE" (see "-R" flag).  Only applies to table objects.
     --checkCSI -- Force the check for a CSI index for the --sortby column.
     --propindexes -- Propagate the indexes existing in original tables.  The
         default is to not propagate them.  Only applies to table objects.
    \n""" % os.path.basename(sys.argv[0])


    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'hvoR:',
                                    ['non-recursive',
                                     'dest-title=',
                                     'dont-create-sysattrs',
                                     'dont-copy-userattrs',
                                     'overwrite-nodes',
                                     'complevel=',
                                     'complib=',
                                     'shuffle=',
                                     'fletcher32=',
                                     'keep-source-filters',
                                     'chunkshape=',
                                     'upgrade-flavors',
                                     'dont-regenerate-old-indexes',
                                     'sortby=',
                                     'checkCSI',
                                     'propindexes',
                                     ])
    except:
        (type, value, traceback) = sys.exc_info()
        print "Error parsing the options. The error was:", value
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    overwritefile = False
    keepfilters = False
    chunkshape = "keep"
    complevel = None
    complib = None
    shuffle = None
    fletcher32 = None
    title = ""
    copyuserattrs = True
    rng = None
    recursive = True
    overwrtnodes = False
    upgradeflavors = False
    sortby = None
    checkCSI = False
    propindexes = False

    # Get the options
    for option in opts:
        if option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            verbose = True
        elif option[0] == '-o':
            overwritefile = True
        elif option[0] == '-R':
            try:
                rng = eval("slice("+option[1]+")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '--dest-title':
            title = option[1]
        elif option[0] == '--dont-create-sysattrs':
            createsysattrs = False
        elif option[0] == '--dont-copy-userattrs':
            copyuserattrs = False
        elif option[0] == '--non-recursive':
            recursive = False
        elif option[0] == '--overwrite-nodes':
            overwrtnodes = True
        elif option[0] == '--keep-source-filters':
            keepfilters = True
        elif option[0] == '--chunkshape':
             chunkshape = option[1]
             if chunkshape.isdigit() or chunkshape.startswith('('):
                 chunkshape = eval(chunkshape)
        elif option[0] == '--upgrade-flavors':
            upgradeflavors = True
        elif option[0] == '--dont-regenerate-old-indexes':
            regoldindexes = False
        elif option[0] == '--complevel':
            complevel = int(option[1])
        elif option[0] == '--complib':
            complib = option[1]
        elif option[0] == '--shuffle':
            shuffle = int(option[1])
        elif option[0] == '--fletcher32':
            fletcher32 = int(option[1])
        elif option[0] == '--sortby':
            sortby = option[1]
        elif option[0] == '--propindexes':
            propindexes = True
        elif option[0] == '--checkCSI':
            checkCSI = True
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # if we pass a number of files different from 2, abort
    if len(pargs) <> 2:
        print "You need to pass both source and destination!."
        sys.stderr.write(usage)
        sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    dst = pargs[1].split(':')
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)
    # Let the user be warned in case he is using ptrepack when copying
    # files with indexes
    #warnings.filterwarnings("ignore", category=NoIndexingWarning)
    # Ignore the flavors warnings during upgrading flavor operations
    if upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    if ((complevel, complib, shuffle, fletcher32) == (None,)*4 or keepfilters):
        filters = None
    else:
        if complevel is None: complevel = 0
        if shuffle is None:
            if complevel > 0:
                shuffle = True
            else:
                shuffle = False
        if complib is None: complib = "zlib"
        if fletcher32 is None: fletcher32 = False
        filters = Filters(complevel=complevel, complib=complib,
                          shuffle=shuffle, fletcher32=fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if rng:
        start, stop, step = rng.start, rng.stop, rng.step

    # Some timing
    t1 = time.time()
    cpu1 = time.clock()
    # Copy the file
    if verbose:
        print "+=+"*20
        print "Recursive copy:", recursive
        print "Applying filters:", filters
        if sortby is not None:
            print "Sorting table(s) by column:", sortby
            print "Forcing a CSI creation:", checkCSI
        if propindexes:
            print "Recreating indexes in copied table(s)"
        print "Start copying %s:%s to %s:%s" % (srcfile, srcnode,
                                                dstfile, dstnode)
        print "+=+"*20

    # Check whether the specified source node is a group or a leaf
    h5srcfile = openFile(srcfile, 'r')
    srcnodeobject = h5srcfile.getNode(srcnode)
    objectclass = srcnodeobject.__class__.__name__
    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'links': 0, 'bytes': 0}
    if isinstance(srcnodeobject, Group):
        copyChildren(
            srcfile, dstfile, srcnode, dstnode,
            title = title, recursive = recursive, filters = filters,
            copyuserattrs = copyuserattrs, overwritefile = overwritefile,
            overwrtnodes = overwrtnodes, stats = stats,
            start = start, stop = stop, step = step, chunkshape = chunkshape,
            sortby = sortby, checkCSI = checkCSI, propindexes = propindexes,
            upgradeflavors=upgradeflavors)
    else:
        # If not a Group, it should be a Leaf
        copyLeaf(
            srcfile, dstfile, srcnode, dstnode,
            title = title, filters = filters, copyuserattrs = copyuserattrs,
            overwritefile = overwritefile, overwrtnodes = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, checkCSI = checkCSI, propindexes = propindexes,
            upgradeflavors=upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = time.clock()
    tcopy = round(t2-t1, 3)
    cpucopy = round(cpu2-cpu1, 3)
    tpercent = int(round(cpucopy/tcopy, 2)*100)

    if verbose:
        ngroups = stats['groups']
        nleaves = stats['leaves']
        nlinks = stats['links']
        nbytescopied = stats['bytes']
        nnodes = ngroups + nleaves + nlinks

        print \
              "Groups copied:", ngroups, \
              " Leaves copied:", nleaves, \
              " Links copied:", nlinks
        if copyuserattrs:
            print "User attrs copied"
        else:
            print "User attrs not copied"
        print "KBytes copied:", round(nbytescopied/1024.,3)
        print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
              (tcopy, cpucopy, tpercent)
        print "Copied nodes/sec: ", round((nnodes) / float(tcopy),1)
        print "Copied KB/s :", int(nbytescopied / (tcopy * 1024))
Example #5
0
def copyChildren(srcfile, dstfile, srcgroup, dstgroup, title,
                 recursive, filters, copyuserattrs, overwritefile,
                 overwrtnodes, stats, start, stop, step,
                 chunkshape, sortby, checkCSI, propindexes,
                 upgradeflavors):
    "Copy the children from source group to destination group"
    # Open the source file with srcgroup as rootUEP
    srcfileh = openFile(srcfile, 'r', rootUEP=srcgroup)
    #  Assign the root to srcGroup
    srcGroup = srcfileh.root

    created_dstGroup = False
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = openFile(dstfile, 'a', PYTABLES_SYS_ATTRS=createsysattrs)
        try:
            dstGroup = dstfileh.getNode(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
            created_dstGroup = True
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstGroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstGroup._v_parent
                    last_slash = dstGroup._v_pathname.rindex('/')
                    dstgroupname = dstGroup._v_pathname[last_slash+1:]
                    dstGroup.remove()
                    dstGroup = dstfileh.createGroup(parent, dstgroupname,
                                                    title=title,
                                                    filters=filters)
                else:
                    raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = openFile(dstfile, 'w', title=title, filters=filters,
                            PYTABLES_SYS_ATTRS=createsysattrs)
        dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)
        created_dstGroup = True

    # Copy the attributes to dstGroup, if needed
    if created_dstGroup and copyuserattrs:
        srcGroup._v_attrs._f_copy(dstGroup)

    # Finally, copy srcGroup children to dstGroup
    try:
        srcGroup._f_copyChildren(
            dstGroup, recursive = recursive, filters = filters,
            copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, checkCSI = checkCSI, propindexes = propindexes)
    except:
        (type, value, traceback) = sys.exc_info()
        print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
              (srcfile, srcgroup, dstfile, dstgroup)
        print "The error was --> %s: %s" % (type, value)
        print "The destination file looks like:\n", dstfileh
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired. In particular, pay attention that rootUEP is not fooling you."

    # Upgrade flavors in dstNode, if required
    if upgradeflavors and srcfileh.format_version.startswith("1"):
        for dstNode in dstGroup._f_walkNodes("Leaf"):
            # Remove original flavor in case the source file has 1.x format
            dstNode.delAttr('FLAVOR')

    # Convert the remaining tables with old indexes (if any)
    for table in srcGroup._f_walkNodes("Table"):
        dsttable = dstfileh.getNode(dstGroup, table._v_pathname)
        recreateIndexes(table, dstfileh, dsttable)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #6
0
def main():
    usage = \
    """usage: %s [-d] [-v] [-a] [-c] [-i] [-R start,stop,step] [-h] file[:nodepath]
      -d -- Dump data information on leaves
      -v -- Dump more metainformation on nodes
      -a -- Show attributes in nodes (only useful when -v or -d are active)
      -c -- Show info of columns in tables (only useful when -v or -d are active)
      -i -- Show info of indexed columns (only useful when -v or -d are active)
      -R RANGE -- Select a RANGE of rows in the form "start,stop,step"
      -h -- Print help on usage
                \n""" \
    % os.path.basename(sys.argv[0])

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'R:ahdvci')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) <> 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # Get the options
    for option in opts:
        if option[0] == '-R':
            options.dump = 1
            try:
                options.rng = eval("slice(" + option[1] + ")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)

        elif option[0] == '-a':
            options.showattrs = 1
        elif option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            options.verbose = 1
        elif option[0] == '-d':
            options.dump = 1
        elif option[0] == '-c':
            options.colinfo = 1
        elif option[0] == '-i':
            options.idxinfo = 1
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    if len(src) == 1:
        filename, nodename = src[0], "/"
    else:
        filename, nodename = src
        if nodename == "":
            # case where filename == "filename:" instead of "filename:/"
            nodename = "/"

    # Check whether the specified node is a group or a leaf
    h5file = openFile(filename, 'r')
    nodeobject = h5file.getNode(nodename)
    if isinstance(nodeobject, Group):
        # Close the file again and reopen using the rootUEP
        dumpGroup(nodeobject)
    elif isinstance(nodeobject, Leaf):
        # If it is not a Group, it must be a Leaf
        dumpLeaf(nodeobject)
    else:
        # This should never happen
        print "Unrecognized object:", nodeobject

    # Close the file
    h5file.close()
Example #7
0
def main():
    usage = \
    """usage: %s [-d] [-v] [-a] [-c] [-i] [-R start,stop,step] [-h] file[:nodepath]
      -d -- Dump data information on leaves
      -v -- Dump more metainformation on nodes
      -a -- Show attributes in nodes (only useful when -v or -d are active)
      -c -- Show info of columns in tables (only useful when -v or -d are active)
      -i -- Show info of indexed columns (only useful when -v or -d are active)
      -R RANGE -- Select a RANGE of rows in the form "start,stop,step"
      -h -- Print help on usage
                \n""" \
    % os.path.basename(sys.argv[0])

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'R:ahdvci')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) != 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # Get the options
    for option in opts:
        if option[0] == '-R':
            options.dump = 1
            try:
                options.rng = eval("slice("+option[1]+")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)

        elif option[0] == '-a':
            options.showattrs = 1
        elif option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            options.verbose = 1
        elif option[0] == '-d':
            options.dump = 1
        elif option[0] == '-c':
            options.colinfo = 1
        elif option[0] == '-i':
            options.idxinfo = 1
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    if len(src) == 1:
        filename, nodename = src[0], "/"
    else:
        filename, nodename = src
        if nodename == "":
            # case where filename == "filename:" instead of "filename:/"
            nodename = "/"

    # Check whether the specified node is a group or a leaf
    h5file = openFile(filename, 'r')
    nodeobject = h5file.getNode(nodename)
    if isinstance(nodeobject, Group):
        # Close the file again and reopen using the rootUEP
        dumpGroup(nodeobject)
    elif isinstance(nodeobject, Leaf):
        # If it is not a Group, it must be a Leaf
        dumpLeaf(nodeobject)
    else:
        # This should never happen
        print "Unrecognized object:", nodeobject

    # Close the file
    h5file.close()
def copyLeaf(srcfile, dstfile, srcnode, dstnode, title,
             filters, copyuserattrs, overwritefile, overwrtnodes, stats,
             start, stop, step, chunkshape, sortby, forceCSI,
             propindexes, upgradeflavors):
    # Open the source file
    if forceCSI:
        srcfileh = openFile(srcfile, 'a')
    else:
        srcfileh = openFile(srcfile, 'r')
    # Get the source node (that should exist)
    srcNode = srcfileh.getNode(srcnode)

    # Get the destination node and its parent
    last_slash = dstnode.rindex('/')
    if last_slash == len(dstnode)-1:
        # print "Detected a trailing slash in destination node. Interpreting it as a destination group."
        dstgroup = dstnode[:-1]
    elif last_slash > 0:
        dstgroup = dstnode[:last_slash]
    else:
        dstgroup = "/"
    dstleaf = dstnode[last_slash+1:]
    if dstleaf == "":
        dstleaf = srcNode.name
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = openFile(dstfile, 'a')
        try:
            dstGroup = dstfileh.getNode(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstGroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstGroup._v_parent
                    last_slash = dstGroup._v_pathname.rindex('/')
                    dstgroupname = dstGroup._v_pathname[last_slash+1:]
                    dstGroup.remove()
                    dstGroup = dstfileh.createGroup(parent, dstgroupname,
                                                    title=title,
                                                    filters=filters)
                else:
                    raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = openFile(dstfile, 'w', title=title, filters=filters)
        dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)

    # Finally, copy srcNode to dstNode
    try:
        dstNode = srcNode.copy(
            dstGroup, dstleaf, filters = filters,
            copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes)
    except:
        (type, value, traceback) = sys.exc_info()
        print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
              (srcfile, srcnode, dstfile, dstnode)
        print "The error was --> %s: %s" % (type, value)
        print "The destination file looks like:\n", dstfileh
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."

    # Upgrade flavors in dstNode, if required
    if upgradeflavors and srcfileh.format_version.startswith("1"):
        # Remove original flavor in case the source file has 1.x format
        dstNode.delAttr('FLAVOR')

    # Recreate possible old indexes in destination node
    if srcNode._c_classId == "TABLE":
        recreateIndexes(srcNode, dstfileh, dstNode)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
def main():
    global verbose
    global regoldindexes

    usage = """usage: %s [-h] [-v] [-o] [-R start,stop,step] [--non-recursive] [--dest-title=title] [--dont-copyuser-attrs] [--overwrite-nodes] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--keep-source-filters] [--chunkshape=value] [--upgrade-flavors] [--dont-regenerate-old-indexes] [--sortby=column] [--forceCSI] [--propindexes] sourcefile:sourcegroup destfile:destgroup
     -h -- Print usage message.
     -v -- Show more information.
     -o -- Overwite destination file.
     -R RANGE -- Select a RANGE of rows (in the form "start,stop,step")
         during the copy of *all* the leaves.  Default values are
         "None,None,1", which means a copy of all the rows.
     --non-recursive -- Do not do a recursive copy. Default is to do it.
     --dest-title=title -- Title for the new file (if not specified,
         the source is copied).
     --dont-copy-userattrs -- Do not copy the user attrs (default is to do it)
     --overwrite-nodes -- Overwrite destination nodes if they exist. Default is
         to not overwrite them.
     --complevel=(0-9) -- Set a compression level (0 for no compression, which
         is the default).
     --complib=lib -- Set the compression library to be used during the copy.
         lib can be set to "zlib", "lzo" or "bzip2". Defaults to "zlib".
     --shuffle=(0|1) -- Activate or not the shuffling filter (default is active
         if complevel>0).
     --fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter
        (not active by default).
     --keep-source-filters -- Use the original filters in source files. The
         default is not doing that if any of --complevel, --complib, --shuffle
         or --fletcher32 option is specified.
     --chunkshape=("keep"|"auto"|int|tuple) -- Set a chunkshape.  A value
         of "auto" computes a sensible value for the chunkshape of the
         leaves copied.  The default is to "keep" the original value.
     --upgrade-flavors -- When repacking PyTables 1.x files, the flavor of
         leaves will be unset. With this, such a leaves will be serialized
         as objects with the internal flavor ('numpy' for 2.x series).
     --dont-regenerate-old-indexes -- Disable regenerating old indexes. The
         default is to regenerate old indexes as they are found.
     --sortby=column -- Do a table copy sorted by the values of "column".
         This requires an existing index in "column".  For reversing the order,
         use a negative value in the "step" part of "RANGE" (see "-R" flag).
         Only applies to table objects.
     --forceCSI -- Force the creation of a CSI index in case one is not
         available for the --sortby column (this implies the modification of
         the *source* file).  The default is to not create it.
     --propindexes -- Propagate the indexes existing in original tables.  The
         default is to not propagate them.  Only applies to table objects.
    \n""" % os.path.basename(sys.argv[0])


    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'hvoR:',
                                    ['non-recursive',
                                     'dest-title=',
                                     'dont-copy-userattrs',
                                     'overwrite-nodes',
                                     'complevel=',
                                     'complib=',
                                     'shuffle=',
                                     'fletcher32=',
                                     'keep-source-filters',
                                     'chunkshape=',
                                     'upgrade-flavors',
                                     'dont-regenerate-old-indexes',
                                     'sortby=',
                                     'forceCSI',
                                     'propindexes',
                                     ])
    except:
        (type, value, traceback) = sys.exc_info()
        print "Error parsing the options. The error was:", value
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    overwritefile = False
    keepfilters = False
    chunkshape = "keep"
    complevel = None
    complib = None
    shuffle = None
    fletcher32 = None
    title = ""
    copyuserattrs = True
    rng = None
    recursive = True
    overwrtnodes = False
    upgradeflavors = False
    sortby = None
    forceCSI = False
    propindexes = False

    # Get the options
    for option in opts:
        if option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            verbose = True
        elif option[0] == '-o':
            overwritefile = True
        elif option[0] == '-R':
            try:
                rng = eval("slice("+option[1]+")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '--dest-title':
            title = option[1]
        elif option[0] == '--dont-copy-userattrs':
            copyuserattrs = False
        elif option[0] == '--non-recursive':
            recursive = False
        elif option[0] == '--overwrite-nodes':
            overwrtnodes = True
        elif option[0] == '--keep-source-filters':
            keepfilters = True
        elif option[0] == '--chunkshape':
             chunkshape = option[1]
             if chunkshape.isdigit() or chunkshape.startswith('('):
                 chunkshape = eval(chunkshape)
        elif option[0] == '--upgrade-flavors':
            upgradeflavors = True
        elif option[0] == '--dont-regenerate-old-indexes':
            regoldindexes = False
        elif option[0] == '--complevel':
            complevel = int(option[1])
        elif option[0] == '--complib':
            complib = option[1]
        elif option[0] == '--shuffle':
            shuffle = int(option[1])
        elif option[0] == '--fletcher32':
            fletcher32 = int(option[1])
        elif option[0] == '--sortby':
            sortby = option[1]
        elif option[0] == '--propindexes':
            propindexes = True
        elif option[0] == '--forceCSI':
            forceCSI = True
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # if we pass a number of files different from 2, abort
    if len(pargs) <> 2:
        print "You need to pass both source and destination!."
        sys.stderr.write(usage)
        sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    dst = pargs[1].split(':')
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)
    # Let the user be warned in case he is using ptrepack when copying
    # files with indexes
    #warnings.filterwarnings("ignore", category=NoIndexingWarning)
    # Ignore the flavors warnings during upgrading flavor operations
    if upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    if ((complevel, complib, shuffle, fletcher32) == (None,)*4 or keepfilters):
        filters = None
    else:
        if complevel is None: complevel = 0
        if shuffle is None:
            if complevel > 0:
                shuffle = True
            else:
                shuffle = False
        if complib is None: complib = "zlib"
        if fletcher32 is None: fletcher32 = False
        filters = Filters(complevel=complevel, complib=complib,
                          shuffle=shuffle, fletcher32=fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if rng:
        start, stop, step = rng.start, rng.stop, rng.step

    # Some timing
    t1 = time.time()
    cpu1 = time.clock()
    # Copy the file
    if verbose:
        print "+=+"*20
        print "Recursive copy:", recursive
        print "Applying filters:", filters
        if sortby is not None:
            print "Sorting table(s) by column:", sortby
            print "Forcing a CSI creation:", forceCSI
        if propindexes:
            print "Recreating indexes in copied table(s)"
        print "Start copying %s:%s to %s:%s" % (srcfile, srcnode,
                                                dstfile, dstnode)
        print "+=+"*20

    # Check whether the specified source node is a group or a leaf
    h5srcfile = openFile(srcfile, 'r')
    srcnodeobject = h5srcfile.getNode(srcnode)
    objectclass = srcnodeobject.__class__.__name__
    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'bytes': 0}
    if isinstance(srcnodeobject, Group):
        copyChildren(
            srcfile, dstfile, srcnode, dstnode,
            title = title, recursive = recursive, filters = filters,
            copyuserattrs = copyuserattrs, overwritefile = overwritefile,
            overwrtnodes = overwrtnodes, stats = stats,
            start = start, stop = stop, step = step, chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes,
            upgradeflavors=upgradeflavors)
    else:
        # If not a Group, it should be a Leaf
        copyLeaf(
            srcfile, dstfile, srcnode, dstnode,
            title = title, filters = filters, copyuserattrs = copyuserattrs,
            overwritefile = overwritefile, overwrtnodes = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes,
            upgradeflavors=upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = time.clock()
    tcopy = round(t2-t1, 3)
    cpucopy = round(cpu2-cpu1, 3)
    tpercent = int(round(cpucopy/tcopy, 2)*100)

    if verbose:
        ngroups = stats['groups']
        nleafs = stats['leaves']
        nbytescopied = stats['bytes']

        print "Groups copied:", ngroups, " Leaves copied:", nleafs
        if copyuserattrs:
            print "User attrs copied"
        else:
            print "User attrs not copied"
        print "KBytes copied:", round(nbytescopied/1024.,3)
        print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
              (tcopy, cpucopy, tpercent)
        print "Copied nodes/sec: ", round((ngroups+nleafs) / float(tcopy),1)
        print "Copied KB/s :", int(nbytescopied / (tcopy * 1024))
def copyChildren(srcfile, dstfile, srcgroup, dstgroup, title,
                 recursive, filters, copyuserattrs, overwritefile,
                 overwrtnodes, stats, start, stop, step,
                 chunkshape, sortby, forceCSI, propindexes,
                 upgradeflavors):
    "Copy the children from source group to destination group"
    # Open the source file with srcgroup as rootUEP
    if forceCSI:
        srcfileh = openFile(srcfile, 'a', rootUEP=srcgroup)
    else:
        srcfileh = openFile(srcfile, 'r', rootUEP=srcgroup)
    #  Assign the root to srcGroup
    srcGroup = srcfileh.root

    created_dstGroup = False
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = openFile(dstfile, 'a')
        try:
            dstGroup = dstfileh.getNode(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
            created_dstGroup = True
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstGroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstGroup._v_parent
                    last_slash = dstGroup._v_pathname.rindex('/')
                    dstgroupname = dstGroup._v_pathname[last_slash+1:]
                    dstGroup.remove()
                    dstGroup = dstfileh.createGroup(parent, dstgroupname,
                                                    title=title,
                                                    filters=filters)
                else:
                    raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = openFile(dstfile, 'w', title=title, filters=filters)
        dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)
        created_dstGroup = True

    # Copy the attributes to dstGroup, if needed
    if created_dstGroup and copyuserattrs:
        srcGroup._v_attrs._f_copy(dstGroup)

    # Finally, copy srcGroup children to dstGroup
    try:
        srcGroup._f_copyChildren(
            dstGroup, recursive = recursive, filters = filters,
            copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
            stats = stats, start = start, stop = stop, step = step,
            chunkshape = chunkshape,
            sortby = sortby, forceCSI = forceCSI, propindexes = propindexes)
    except:
        (type, value, traceback) = sys.exc_info()
        print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
              (srcfile, srcgroup, dstfile, dstgroup)
        print "The error was --> %s: %s" % (type, value)
        print "The destination file looks like:\n", dstfileh
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError, "Please check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired. In particular, pay attention that rootUEP is not fooling you."

    # Upgrade flavors in dstNode, if required
    if upgradeflavors and srcfileh.format_version.startswith("1"):
        for dstNode in dstGroup._f_walkNodes("Leaf"):
            # Remove original flavor in case the source file has 1.x format
            dstNode.delAttr('FLAVOR')

    # Convert the remaining tables with old indexes (if any)
    for table in srcGroup._f_walkNodes("Table"):
        dsttable = dstfileh.getNode(dstGroup, table._v_pathname)
        recreateIndexes(table, dstfileh, dsttable)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #11
0
''' Take two arrays, merge them together after translating the labels which are in the first 
    column of each array
'''
def combine_arrays(foci_array,shape_array):
    foci_labels = foci_array[:,0]
    shape_labels = shape_array[:,0]
    new_labels = np.array([label_dict[t] for t in zip(foci_labels,shape_labels)])
    new_labels.shape = (len(new_labels),1)
    return np.hstack((new_labels,foci_array[:,(1,2)]))
    
    
        

# Open and prepare an hdf5 file, adding a labels group 
filename = options.filename
h5file = openFile(filename, mode = "a", title = "Data File")
labels_group = h5file.createGroup("/", 'labels', 'The labels and object IDs')
zlib_filters = Filters(complib='zlib', complevel=5)

# Go to the files location in the filesystem.
shape_input = options.labels_indir
foci_input = options.foci_indir
cur_dir = getcwd()

try:
    foci_files = read_filenames(foci_input,'.out')
    shape_files = read_filenames(shape_input,'.label')
except:
    print "Could not read files from one of " + foci_input + ", " + shape_input
    sys.exit(1)