Example #1
0
def main():
    parser = _get_parser()

    args = parser.parse_args(namespace=options)

    # Get the options
    if isinstance(args.rng, str):
        try:
            options.rng = eval("slice(" + args.rng + ")")
        except Exception:
            parser.error("Error when getting the range parameter.")
        else:
            args.dump = 1

    # Catch the files passed as the last arguments
    src = args.src.rsplit(':', 1)
    if len(src) == 1:
        filename, nodename = src[0], "/"
    else:
        filename, nodename = src
        if nodename == "":
            # case where filename == "filename:" instead of "filename:/"
            nodename = "/"

    try:
        h5file = open_file(filename, 'r')
    except Exception as e:
        return 'Cannot open input file: ' + str(e)

    with h5file:
        # Check whether the specified node is a group or a leaf
        nodeobject = h5file.get_node(nodename)
        if isinstance(nodeobject, Group):
            # Close the file again and reopen using the root_uep
            dump_group(nodeobject, args.sort)
        elif isinstance(nodeobject, Leaf):
            # If it is not a Group, it must be a Leaf
            dump_leaf(nodeobject)
        else:
            # This should never happen
            print("Unrecognized object:", nodeobject)
Example #2
0
def main():
    parser = _get_parser()

    args = parser.parse_args(namespace=options)

    # Get the options
    if isinstance(args.rng, six.string_types):
        try:
            options.rng = eval("slice(" + args.rng + ")")
        except Exception:
            parser.error("Error when getting the range parameter.")
        else:
            args.dump = 1

    # Catch the files passed as the last arguments
    src = args.src.split(':')
    if len(src) == 1:
        filename, nodename = src[0], "/"
    else:
        filename, nodename = src
        if nodename == "":
            # case where filename == "filename:" instead of "filename:/"
            nodename = "/"

    try:
        h5file = open_file(filename, 'r')
    except Exception as e:
        return 'Cannot open input file: ' + str(e)

    with h5file:
        # Check whether the specified node is a group or a leaf
        nodeobject = h5file.get_node(nodename)
        if isinstance(nodeobject, Group):
            # Close the file again and reopen using the root_uep
            dump_group(nodeobject, args.sort)
        elif isinstance(nodeobject, Leaf):
            # If it is not a Group, it must be a Leaf
            dump_leaf(nodeobject)
        else:
            # This should never happen
            print("Unrecognized object:", nodeobject)
Example #3
0
def copy_leaf(srcfile, dstfile, srcnode, dstnode, title, filters,
              copyuserattrs, overwritefile, overwrtnodes, stats, start, stop,
              step, chunkshape, sortby, check_CSI, propindexes,
              upgradeflavors):
    # Open the source file
    srcfileh = open_file(srcfile, 'r')
    # Get the source node (that should exist)
    srcnode = srcfileh.get_node(srcnode)

    # Get the destination node and its parent
    last_slash = dstnode.rindex('/')
    if last_slash == len(dstnode) - 1:
        # print("Detected a trailing slash in destination node. "
        #       "Interpreting it as a destination group.")
        dstgroup = dstnode[:-1]
    elif last_slash > 0:
        dstgroup = dstnode[:last_slash]
    else:
        dstgroup = "/"
    dstleaf = dstnode[last_slash + 1:]
    if dstleaf == "":
        dstleaf = srcnode.name
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = open_file(dstfile, 'a', pytables_sys_attrs=createsysattrs)
        try:
            dstgroup = dstfileh.get_node(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstgroup = newdst_group(dstfileh, dstgroup, title, filters)
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstgroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstgroup._v_parent
                    last_slash = dstgroup._v_pathname.rindex('/')
                    dstgroupname = dstgroup._v_pathname[last_slash + 1:]
                    dstgroup.remove()
                    dstgroup = dstfileh.create_group(parent,
                                                     dstgroupname,
                                                     title=title,
                                                     filters=filters)
                else:
                    raise RuntimeError("Please check that the node names are "
                                       "not duplicated in destination, and "
                                       "if so, add the --overwrite-nodes "
                                       "flag if desired.")
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = open_file(dstfile,
                             'w',
                             title=title,
                             filters=filters,
                             pytables_sys_attrs=createsysattrs)
        dstgroup = newdst_group(dstfileh, dstgroup, title="", filters=filters)

    # Finally, copy srcnode to dstnode
    try:
        dstnode = srcnode.copy(dstgroup,
                               dstleaf,
                               filters=filters,
                               copyuserattrs=copyuserattrs,
                               overwrite=overwrtnodes,
                               stats=stats,
                               start=start,
                               stop=stop,
                               step=step,
                               chunkshape=chunkshape,
                               sortby=sortby,
                               check_CSI=check_CSI,
                               propindexes=propindexes)
    except:
        (type_, value, traceback) = sys.exc_info()
        print("Problems doing the copy from '%s:%s' to '%s:%s'" %
              (srcfile, srcnode, dstfile, dstnode))
        print("The error was --> %s: %s" % (type_, value))
        print("The destination file looks like:\n", dstfileh)
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError("Please check that the node names are not "
                           "duplicated in destination, and if so, add "
                           "the --overwrite-nodes flag if desired.")

    # Upgrade flavors in dstnode, if required
    if upgradeflavors:
        if srcfileh.format_version.startswith("1"):
            # Remove original flavor in case the source file has 1.x format
            dstnode.del_attr('FLAVOR')
        elif srcfileh.format_version < "2.1":
            if dstnode.get_attr('FLAVOR') in numpy_aliases:
                dstnode.set_attr('FLAVOR', internal_flavor)

    # Recreate possible old indexes in destination node
    if srcnode._c_classid == "TABLE":
        recreate_indexes(srcnode, dstfileh, dstnode)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #4
0
def main():
    global verbose
    global regoldindexes
    global createsysattrs

    parser = _get_parser()
    args = parser.parse_args()

    # check arguments
    if args.rng:
        try:
            args.rng = eval("slice(" + args.rng + ")")
        except Exception:
            parser.error("Error when getting the range parameter.")

    if args.chunkshape.isdigit() or args.chunkshape.startswith('('):
        args.chunkshape = eval(args.chunkshape)

    if args.complevel < 0 or args.complevel > 9:
        parser.error(
            'invalid "complevel" value, it sould be in te range [0, 9]')

    # Catch the files passed as the last arguments
    src = args.src.rsplit(':', 1)
    dst = args.dst.rsplit(':', 1)
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)

    # Ignore the flavors warnings during upgrading flavor operations
    if args.upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    filter_params = (
        args.complevel,
        args.complib,
        args.shuffle,
        args.bitshuffle,
        args.fletcher32,
    )
    if (filter_params == (None, ) * 4 or args.keepfilters):
        filters = None
    else:
        if args.complevel is None:
            args.complevel = 0
        if args.shuffle is None:
            if args.complevel > 0:
                args.shuffle = True
            else:
                args.shuffle = False
        if args.bitshuffle is None:
            args.bitshuffle = False
        if args.bitshuffle:
            # Shuffle and bitshuffle are mutually exclusive
            args.shuffle = False
        if args.complib is None:
            args.complib = "zlib"
        if args.fletcher32 is None:
            args.fletcher32 = False
        filters = Filters(complevel=args.complevel,
                          complib=args.complib,
                          shuffle=args.shuffle,
                          bitshuffle=args.bitshuffle,
                          fletcher32=args.fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if args.rng:
        start, stop, step = args.rng.start, args.rng.stop, args.rng.step

    # Set globals
    verbose = args.verbose
    regoldindexes = args.regoldindexes
    createsysattrs = args.createsysattrs

    # Some timing
    t1 = time.time()
    cpu1 = cputime()
    # Copy the file
    if verbose:
        print("+=+" * 20)
        print("Recursive copy:", args.recursive)
        print("Applying filters:", filters)
        if args.sortby is not None:
            print("Sorting table(s) by column:", args.sortby)
            print("Forcing a CSI creation:", args.checkCSI)
        if args.propindexes:
            print("Recreating indexes in copied table(s)")
        print("Start copying %s:%s to %s:%s" %
              (srcfile, srcnode, dstfile, dstnode))
        print("+=+" * 20)

    # Check whether the specified source node is a group or a leaf
    h5srcfile = open_file(srcfile, 'r')
    srcnodeobject = h5srcfile.get_node(srcnode)

    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'links': 0, 'bytes': 0, 'hardlinks': 0}
    if isinstance(srcnodeobject, Group):
        copy_children(srcfile,
                      dstfile,
                      srcnode,
                      dstnode,
                      title=args.title,
                      recursive=args.recursive,
                      filters=filters,
                      copyuserattrs=args.copyuserattrs,
                      overwritefile=args.overwritefile,
                      overwrtnodes=args.overwrtnodes,
                      stats=stats,
                      start=start,
                      stop=stop,
                      step=step,
                      chunkshape=args.chunkshape,
                      sortby=args.sortby,
                      check_CSI=args.checkCSI,
                      propindexes=args.propindexes,
                      upgradeflavors=args.upgradeflavors,
                      use_hardlinks=True)
    else:
        # If not a Group, it should be a Leaf
        copy_leaf(srcfile,
                  dstfile,
                  srcnode,
                  dstnode,
                  title=args.title,
                  filters=filters,
                  copyuserattrs=args.copyuserattrs,
                  overwritefile=args.overwritefile,
                  overwrtnodes=args.overwrtnodes,
                  stats=stats,
                  start=start,
                  stop=stop,
                  step=step,
                  chunkshape=args.chunkshape,
                  sortby=args.sortby,
                  check_CSI=args.checkCSI,
                  propindexes=args.propindexes,
                  upgradeflavors=args.upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = cputime()
    tcopy = round(t2 - t1, 3)
    cpucopy = round(cpu2 - cpu1, 3)
    try:
        tpercent = int(round(cpucopy / tcopy, 2) * 100)
    except ZeroDivisionError:
        tpercent = 'NaN'

    if verbose:
        ngroups = stats['groups']
        nleaves = stats['leaves']
        nlinks = stats['links']
        nhardlinks = stats['hardlinks']
        nbytescopied = stats['bytes']
        nnodes = ngroups + nleaves + nlinks + nhardlinks

        print(
            "Groups copied:",
            ngroups,
            ", Leaves copied:",
            nleaves,
            ", Links copied:",
            nlinks,
            ", Hard links copied:",
            nhardlinks,
        )
        if args.copyuserattrs:
            print("User attrs copied")
        else:
            print("User attrs not copied")
        print("KBytes copied:", round(nbytescopied / 1024., 3))
        print("Time copying: %s s (real) %s s (cpu)  %s%%" %
              (tcopy, cpucopy, tpercent))
        print("Copied nodes/sec: ", round((nnodes) / float(tcopy), 1))
        print("Copied KB/s :", int(nbytescopied / (tcopy * 1024)))
Example #5
0
def copy_children(srcfile,
                  dstfile,
                  srcgroup,
                  dstgroup,
                  title,
                  recursive,
                  filters,
                  copyuserattrs,
                  overwritefile,
                  overwrtnodes,
                  stats,
                  start,
                  stop,
                  step,
                  chunkshape,
                  sortby,
                  check_CSI,
                  propindexes,
                  upgradeflavors,
                  use_hardlinks=True):
    """Copy the children from source group to destination group"""
    # Open the source file with srcgroup as root_uep
    srcfileh = open_file(srcfile, 'r', root_uep=srcgroup)
    #  Assign the root to srcgroup
    srcgroup = srcfileh.root

    created_dstgroup = False
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = open_file(dstfile, 'a', pytables_sys_attrs=createsysattrs)
        try:
            dstgroup = dstfileh.get_node(dstgroup)
        except NoSuchNodeError:
            # The dstgroup does not seem to exist. Try creating it.
            dstgroup = newdst_group(dstfileh, dstgroup, title, filters)
            created_dstgroup = True
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstgroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstgroup._v_parent
                    last_slash = dstgroup._v_pathname.rindex('/')
                    dstgroupname = dstgroup._v_pathname[last_slash + 1:]
                    dstgroup.remove()
                    dstgroup = dstfileh.create_group(parent,
                                                     dstgroupname,
                                                     title=title,
                                                     filters=filters)
                else:
                    raise RuntimeError("Please check that the node names are "
                                       "not duplicated in destination, and "
                                       "if so, add the --overwrite-nodes "
                                       "flag if desired.")
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = open_file(dstfile,
                             'w',
                             title=title,
                             filters=filters,
                             pytables_sys_attrs=createsysattrs)
        dstgroup = newdst_group(dstfileh, dstgroup, title="", filters=filters)
        created_dstgroup = True

    # Copy the attributes to dstgroup, if needed
    if created_dstgroup and copyuserattrs:
        srcgroup._v_attrs._f_copy(dstgroup)

    # Finally, copy srcgroup children to dstgroup
    try:
        srcgroup._f_copy_children(dstgroup,
                                  recursive=recursive,
                                  filters=filters,
                                  copyuserattrs=copyuserattrs,
                                  overwrite=overwrtnodes,
                                  stats=stats,
                                  start=start,
                                  stop=stop,
                                  step=step,
                                  chunkshape=chunkshape,
                                  sortby=sortby,
                                  check_CSI=check_CSI,
                                  propindexes=propindexes,
                                  use_hardlinks=use_hardlinks)
    except:
        (type_, value, traceback) = sys.exc_info()
        print("Problems doing the copy from '%s:%s' to '%s:%s'" %
              (srcfile, srcgroup, dstfile, dstgroup))
        print("The error was --> %s: %s" % (type_, value))
        print("The destination file looks like:\n", dstfileh)
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError("Please check that the node names are not "
                           "duplicated in destination, and if so, add the "
                           "--overwrite-nodes flag if desired. In "
                           "particular, pay attention that root_uep is not "
                           "fooling you.")

    # Upgrade flavors in dstnode, if required
    if upgradeflavors:
        for dstnode in dstgroup._f_walknodes("Leaf"):
            if srcfileh.format_version.startswith("1"):
                # Remove original flavor in case the source file has 1.x format
                dstnode.del_attr('FLAVOR')
            elif srcfileh.format_version < "2.1":
                if dstnode.get_attr('FLAVOR') in numpy_aliases:
                    dstnode.set_attr('FLAVOR', internal_flavor)

    # Convert the remaining tables with old indexes (if any)
    for table in srcgroup._f_walknodes("Table"):
        dsttable = dstfileh.get_node(dstgroup, table._v_pathname)
        recreate_indexes(table, dstfileh, dsttable)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #6
0
def copy_leaf(srcfile, dstfile, srcnode, dstnode, title,
              filters, copyuserattrs, overwritefile, overwrtnodes, stats,
              start, stop, step, chunkshape, sortby, check_CSI,
              propindexes, upgradeflavors):
    # Open the source file
    srcfileh = open_file(srcfile, 'r')
    # Get the source node (that should exist)
    srcnode = srcfileh.get_node(srcnode)

    # Get the destination node and its parent
    last_slash = dstnode.rindex('/')
    if last_slash == len(dstnode)-1:
        # print("Detected a trailing slash in destination node. "
        #       "Interpreting it as a destination group.")
        dstgroup = dstnode[:-1]
    elif last_slash > 0:
        dstgroup = dstnode[:last_slash]
    else:
        dstgroup = "/"
    dstleaf = dstnode[last_slash + 1:]
    if dstleaf == "":
        dstleaf = srcnode.name
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = open_file(dstfile, 'a', pytables_sys_attrs=createsysattrs)
        try:
            dstgroup = dstfileh.get_node(dstgroup)
        except:
            # The dstgroup does not seem to exist. Try creating it.
            dstgroup = newdst_group(dstfileh, dstgroup, title, filters)
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstgroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstgroup._v_parent
                    last_slash = dstgroup._v_pathname.rindex('/')
                    dstgroupname = dstgroup._v_pathname[last_slash + 1:]
                    dstgroup.remove()
                    dstgroup = dstfileh.create_group(parent, dstgroupname,
                                                     title=title,
                                                     filters=filters)
                else:
                    raise RuntimeError("Please check that the node names are "
                                       "not duplicated in destination, and "
                                       "if so, add the --overwrite-nodes "
                                       "flag if desired.")
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = open_file(dstfile, 'w', title=title, filters=filters,
                             pytables_sys_attrs=createsysattrs)
        dstgroup = newdst_group(dstfileh, dstgroup, title="", filters=filters)

    # Finally, copy srcnode to dstnode
    try:
        dstnode = srcnode.copy(
            dstgroup, dstleaf, filters=filters,
            copyuserattrs=copyuserattrs, overwrite=overwrtnodes,
            stats=stats, start=start, stop=stop, step=step,
            chunkshape=chunkshape,
            sortby=sortby, check_CSI=check_CSI, propindexes=propindexes)
    except:
        (type_, value, traceback) = sys.exc_info()
        print("Problems doing the copy from '%s:%s' to '%s:%s'" %
              (srcfile, srcnode, dstfile, dstnode))
        print("The error was --> %s: %s" % (type_, value))
        print("The destination file looks like:\n", dstfileh)
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError("Please check that the node names are not "
                           "duplicated in destination, and if so, add "
                           "the --overwrite-nodes flag if desired.")

    # Upgrade flavors in dstnode, if required
    if upgradeflavors:
        if srcfileh.format_version.startswith("1"):
            # Remove original flavor in case the source file has 1.x format
            dstnode.del_attr('FLAVOR')
        elif srcfileh.format_version < "2.1":
            if dstnode.get_attr('FLAVOR') in numpy_aliases:
                dstnode.set_attr('FLAVOR', internal_flavor)

    # Recreate possible old indexes in destination node
    if srcnode._c_classid == "TABLE":
        recreate_indexes(srcnode, dstfileh, dstnode)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #7
0
def copy_children(srcfile, dstfile, srcgroup, dstgroup, title,
                  recursive, filters, copyuserattrs, overwritefile,
                  overwrtnodes, stats, start, stop, step,
                  chunkshape, sortby, check_CSI, propindexes,
                  upgradeflavors, use_hardlinks=True):
    """Copy the children from source group to destination group"""
    # Open the source file with srcgroup as root_uep
    srcfileh = open_file(srcfile, 'r', root_uep=srcgroup)
    #  Assign the root to srcgroup
    srcgroup = srcfileh.root

    created_dstgroup = False
    # Check whether the destination group exists or not
    if os.path.isfile(dstfile) and not overwritefile:
        dstfileh = open_file(dstfile, 'a', pytables_sys_attrs=createsysattrs)
        try:
            dstgroup = dstfileh.get_node(dstgroup)
        except NoSuchNodeError:
            # The dstgroup does not seem to exist. Try creating it.
            dstgroup = newdst_group(dstfileh, dstgroup, title, filters)
            created_dstgroup = True
        else:
            # The node exists, but it is really a group?
            if not isinstance(dstgroup, Group):
                # No. Should we overwrite it?
                if overwrtnodes:
                    parent = dstgroup._v_parent
                    last_slash = dstgroup._v_pathname.rindex('/')
                    dstgroupname = dstgroup._v_pathname[last_slash + 1:]
                    dstgroup.remove()
                    dstgroup = dstfileh.create_group(parent, dstgroupname,
                                                     title=title,
                                                     filters=filters)
                else:
                    raise RuntimeError("Please check that the node names are "
                                       "not duplicated in destination, and "
                                       "if so, add the --overwrite-nodes "
                                       "flag if desired.")
    else:
        # The destination file does not exist or will be overwritten.
        dstfileh = open_file(dstfile, 'w', title=title, filters=filters,
                             pytables_sys_attrs=createsysattrs)
        dstgroup = newdst_group(dstfileh, dstgroup, title="", filters=filters)
        created_dstgroup = True

    # Copy the attributes to dstgroup, if needed
    if created_dstgroup and copyuserattrs:
        srcgroup._v_attrs._f_copy(dstgroup)

    # Finally, copy srcgroup children to dstgroup
    try:
        srcgroup._f_copy_children(
            dstgroup, recursive=recursive, filters=filters,
            copyuserattrs=copyuserattrs, overwrite=overwrtnodes,
            stats=stats, start=start, stop=stop, step=step,
            chunkshape=chunkshape,
            sortby=sortby, check_CSI=check_CSI, propindexes=propindexes,
            use_hardlinks=use_hardlinks)
    except:
        (type_, value, traceback) = sys.exc_info()
        print("Problems doing the copy from '%s:%s' to '%s:%s'" %
              (srcfile, srcgroup, dstfile, dstgroup))
        print("The error was --> %s: %s" % (type_, value))
        print("The destination file looks like:\n", dstfileh)
        # Close all the open files:
        srcfileh.close()
        dstfileh.close()
        raise RuntimeError("Please check that the node names are not "
                           "duplicated in destination, and if so, add the "
                           "--overwrite-nodes flag if desired. In "
                           "particular, pay attention that root_uep is not "
                           "fooling you.")

    # Upgrade flavors in dstnode, if required
    if upgradeflavors:
        for dstnode in dstgroup._f_walknodes("Leaf"):
            if srcfileh.format_version.startswith("1"):
                # Remove original flavor in case the source file has 1.x format
                dstnode.del_attr('FLAVOR')
            elif srcfileh.format_version < "2.1":
                if dstnode.get_attr('FLAVOR') in numpy_aliases:
                    dstnode.set_attr('FLAVOR', internal_flavor)

    # Convert the remaining tables with old indexes (if any)
    for table in srcgroup._f_walknodes("Table"):
        dsttable = dstfileh.get_node(dstgroup, table._v_pathname)
        recreate_indexes(table, dstfileh, dsttable)

    # Close all the open files:
    srcfileh.close()
    dstfileh.close()
Example #8
0
def main():
    global verbose
    global regoldindexes
    global createsysattrs

    parser = _get_parser()
    args = parser.parse_args()

    # check arguments
    if args.rng:
        try:
            args.rng = eval("slice(" + args.rng + ")")
        except Exception:
            parser.error("Error when getting the range parameter.")

    if args.chunkshape.isdigit() or args.chunkshape.startswith('('):
        args.chunkshape = eval(args.chunkshape)

    if args.complevel < 0 or args.complevel > 9:
        parser.error(
            'invalid "complevel" value, it sould be in te range [0, 9]'
        )

    # Catch the files passed as the last arguments
    src = args.src.rsplit(':', 1)
    dst = args.dst.rsplit(':', 1)
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)

    # Ignore the flavors warnings during upgrading flavor operations
    if args.upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    filter_params = (
        args.complevel,
        args.complib,
        args.shuffle,
        args.bitshuffle,
        args.fletcher32,
    )
    if (filter_params == (None,) * 4 or args.keepfilters):
        filters = None
    else:
        if args.complevel is None:
            args.complevel = 0
        if args.shuffle is None:
            if args.complevel > 0:
                args.shuffle = True
            else:
                args.shuffle = False
        if args.bitshuffle is None:
            args.bitshuffle = False
        if args.bitshuffle:
            # Shuffle and bitshuffle are mutually exclusive
            args.shuffle = False
        if args.complib is None:
            args.complib = "zlib"
        if args.fletcher32 is None:
            args.fletcher32 = False
        filters = Filters(complevel=args.complevel, complib=args.complib,
                          shuffle=args.shuffle, bitshuffle=args.bitshuffle,
                          fletcher32=args.fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if args.rng:
        start, stop, step = args.rng.start, args.rng.stop, args.rng.step

    # Set globals
    verbose = args.verbose
    regoldindexes = args.regoldindexes
    createsysattrs = args.createsysattrs

    # Some timing
    t1 = time.time()
    cpu1 = time.clock()
    # Copy the file
    if verbose:
        print("+=+" * 20)
        print("Recursive copy:", args.recursive)
        print("Applying filters:", filters)
        if args.sortby is not None:
            print("Sorting table(s) by column:", args.sortby)
            print("Forcing a CSI creation:", args.checkCSI)
        if args.propindexes:
            print("Recreating indexes in copied table(s)")
        print("Start copying %s:%s to %s:%s" % (srcfile, srcnode,
                                                dstfile, dstnode))
        print("+=+" * 20)

    # Check whether the specified source node is a group or a leaf
    h5srcfile = open_file(srcfile, 'r')
    srcnodeobject = h5srcfile.get_node(srcnode)

    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'links': 0, 'bytes': 0, 'hardlinks': 0}
    if isinstance(srcnodeobject, Group):
        copy_children(
            srcfile, dstfile, srcnode, dstnode,
            title=args.title, recursive=args.recursive, filters=filters,
            copyuserattrs=args.copyuserattrs, overwritefile=args.overwritefile,
            overwrtnodes=args.overwrtnodes, stats=stats,
            start=start, stop=stop, step=step, chunkshape=args.chunkshape,
            sortby=args.sortby, check_CSI=args.checkCSI,
            propindexes=args.propindexes,
            upgradeflavors=args.upgradeflavors,
            use_hardlinks=True)
    else:
        # If not a Group, it should be a Leaf
        copy_leaf(
            srcfile, dstfile, srcnode, dstnode,
            title=args.title, filters=filters,
            copyuserattrs=args.copyuserattrs,
            overwritefile=args.overwritefile, overwrtnodes=args.overwrtnodes,
            stats=stats, start=start, stop=stop, step=step,
            chunkshape=args.chunkshape,
            sortby=args.sortby, check_CSI=args.checkCSI,
            propindexes=args.propindexes,
            upgradeflavors=args.upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = time.clock()
    tcopy = round(t2 - t1, 3)
    cpucopy = round(cpu2 - cpu1, 3)
    try:
        tpercent = int(round(cpucopy / tcopy, 2) * 100)
    except ZeroDivisionError:
        tpercent = 'NaN'

    if verbose:
        ngroups = stats['groups']
        nleaves = stats['leaves']
        nlinks = stats['links']
        nhardlinks = stats['hardlinks']
        nbytescopied = stats['bytes']
        nnodes = ngroups + nleaves + nlinks + nhardlinks

        print(
            "Groups copied:", ngroups,
            ", Leaves copied:", nleaves,
            ", Links copied:", nlinks,
            ", Hard links copied:", nhardlinks,
        )
        if args.copyuserattrs:
            print("User attrs copied")
        else:
            print("User attrs not copied")
        print("KBytes copied:", round(nbytescopied / 1024., 3))
        print("Time copying: %s s (real) %s s (cpu)  %s%%" % (
            tcopy, cpucopy, tpercent))
        print("Copied nodes/sec: ", round((nnodes) / float(tcopy), 1))
        print("Copied KB/s :", int(nbytescopied / (tcopy * 1024)))
Example #9
0
def main():
    usage = \
        """usage: %s [-d] [-v] [-a] [-c] [-i] [-R start,stop,step] [-h] file[:nodepath]
      -d -- Dump data information on leaves
      -v -- Dump more metainformation on nodes
      -a -- Show attributes in nodes (only useful when -v or -d are active)
      -c -- Show info of columns in tables (only useful when -v or -d are active)
      -i -- Show info of indexed columns (only useful when -v or -d are active)
      -R RANGE -- Select a RANGE of rows in the form "start,stop,step"
      -h -- Print help on usage
                \n""" \
    % os.path.basename(sys.argv[0])

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'R:ahdvci')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) != 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # Get the options
    for option in opts:
        if option[0] == '-R':
            options.dump = 1
            try:
                options.rng = eval("slice(" + option[1] + ")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)

        elif option[0] == '-a':
            options.showattrs = 1
        elif option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            options.verbose = 1
        elif option[0] == '-d':
            options.dump = 1
        elif option[0] == '-c':
            options.colinfo = 1
        elif option[0] == '-i':
            options.idxinfo = 1
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    if len(src) == 1:
        filename, nodename = src[0], "/"
    else:
        filename, nodename = src
        if nodename == "":
            # case where filename == "filename:" instead of "filename:/"
            nodename = "/"

    # Check whether the specified node is a group or a leaf
    h5file = open_file(filename, 'r')
    nodeobject = h5file.get_node(nodename)
    if isinstance(nodeobject, Group):
        # Close the file again and reopen using the root_uep
        dump_group(nodeobject)
    elif isinstance(nodeobject, Leaf):
        # If it is not a Group, it must be a Leaf
        dump_leaf(nodeobject)
    else:
        # This should never happen
        print "Unrecognized object:", nodeobject

    # Close the file
    h5file.close()
Example #10
0
    def test_pytables_dummytable(self):
        '''Test some pytable casting stuff NOT clearly documented :( '''

        with open_file(self.output_file, 'a') as h5file:
            table = h5file.create_table("/",
                                        'table',
                                        description=DummyTable)
            row = table.row

            # define set and get to avoid pylint flase positives
            # everywhere:
            def set(field, val):  # @ReservedAssignment
                '''sets a value on the row'''
                row[field] = val  # pylint: disable=unsupported-assignment-operation

            def get(field):
                '''gets a value from the row'''
                return row[field]  # pylint: disable=unsubscriptable-object

            # assert the value is the default:
            self.assertTrue(np.isnan(get('floatcol')))
            # what if we supply a string? TypeError
            with self.assertRaises(TypeError):
                set('floatcol', 'a')
            # same if string empty: TypeError
            with self.assertRaises(TypeError):
                set('floatcol', '')
            # assert the value is still the default:
            self.assertTrue(np.isnan(get('floatcol')))
            # what if we supply a castable string instead? it is casted
            set('floatcol', '5.5')
            self.assertEqual(get('floatcol'), 5.5)
            # what if we supply a castable string instead WITH SPACES? casted
            set('floatcol', '1.0 ')
            self.assertEqual(get('floatcol'), 1.0)
            # what if we supply a scalr instead of an array?
            # the value is broadcasted:
            set('arraycol', 5)
            self.assertTrue(np.allclose([5] * 10, get('arraycol')))
            # what if arraycol string?
            set('arraycol', '5')
            self.assertTrue(np.allclose([5] * 10, get('arraycol')))
            # what if arraycol array of strings?
            set('arraycol', [str(_) for _ in [5] * 10])
            self.assertTrue(np.allclose([5] * 10, get('arraycol')))
            # what if arraycol array of strings with one nan?
            aaa = [str(_) for _ in [5] * 10]
            aaa[3] = 'asd'
            with self.assertRaises(ValueError):
                set('arraycol', aaa)
            # what if we supply a float out of bound? no error
            # but value is saved differently!
            maxfloat32 = 3.4028235e+38
            val = maxfloat32 * 10
            set('floatcol', val)
            val2 = get('floatcol')
            # assert they are not the same
            self.assertTrue(not np.isclose(val2, val))
            # now restore val to the max Float32, and assert they are the same:
            val = maxfloat32
            set('floatcol', val)
            val2 = get('floatcol')
            self.assertTrue(np.isclose(val2, val))
            # write to the table nan and see if we can select it later:
            set('floatcol', float('nan'))
            set('arraycol', [1, 2, 3, 4, float('nan'), 6.6, 7.7, 8.8, 9.9,
                             10.00045])

            # setting ascii str in stringcol is safe, we do not need to convert
            set('stringcol', "abc")
            # However, returned value is bytes
            self.assertEqual(get('stringcol'), b'abc')

            # test WHY enumcol IS USELESS:
            eval = get('ecol')
            # here is the point about enumcols: WE CANNOT SET the LABEL!
            # What's the point of having an enum if we actually need to
            # set/get the associated int?
            with self.assertRaises(Exception):
                set('ecol' , 'a')
            set('ecol' , -5)
            self.assertEqual(get('ecol'), -5)

            #test time64 col:
            tme = get('tcol')
            self.assertTrue(np.isnan(tme))
            dtime = datetime.utcnow()
            with self.assertRaises(TypeError):
                set('tcol', dtime)
            tme = get('tcol')
            self.assertTrue(np.isnan(tme))
            # now set a numpy datetim64. ERROR! WTF!
            tme = np.datetime64('2007-02-01T00:01:04')
            with self.assertRaises(TypeError):
                set('tcol', tme)
            # OK conclusion: pytables TimeCol is absolutely USELESS

            row.append()  # pylint: disable=no-member
            table.flush()

        # test selections:

        with open_file(self.output_file, 'a') as h5file:
            tbl = h5file.get_node("/", 'table')
            ###########################
            # TEST NAN SELECTION:
            ###########################
            # this does not work:
            with self.assertRaises(NameError):
                [r['floatcol'] for r in tbl.where('floatcol == nan')]
            # what if we provide the nan as string?
            # incompatible types (NotImplementedError)
            with self.assertRaises(NotImplementedError):
                [r['floatcol'] for r in tbl.where("floatcol == 'nan'")]
            # we should use the condvars dict:
            vals = [r['floatcol'] for r in
                    tbl.where('floatcol == nan',
                              condvars={'nan': float('nan')})]
            # does not raise, but we did not get what we wanted:
            self.assertTrue(not vals)
            # we should actually test nan equalisty with this weird
            # test: (https://stackoverflow.com/a/10821267)
            vals = [r['floatcol'] for r in tbl.where('floatcol != floatcol')]
            # now it works:
            self.assertEqual(len(vals), 1)

            ###########################
            # TEST ARRAY SELECTION:
            ###########################
            # this does not work. Array selection not yet supported:
            with self.assertRaises(NotImplementedError):
                vals = [r['arraycol'] for r in
                        tbl.where('arraycol == 2')]

            ###########################
            # TEST STRING SELECTION:
            ###########################
            # this does not work, needs quotes:
            with self.assertRaises(NameError):
                vals = [r['stringcol'] for r in
                        tbl.where("stringcol == %s" % 'abc')]
            # this SHOULD NOT WORK either (not binary strings), BUT IT DOES:
            vals = [r['stringcol'] for r in
                    tbl.where("stringcol == 'abc'")]
            self.assertTrue(len(vals) == 1)
Example #11
0
def main():
    global verbose
    global regoldindexes
    global createsysattrs

    usage = """usage: %s [-h] [-v] [-o] [-R start,stop,step] [--non-recursive] [--dest-title=title] [--dont-create-sysattrs] [--dont-copy-userattrs] [--overwrite-nodes] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--keep-source-filters] [--chunkshape=value] [--upgrade-flavors] [--dont-regenerate-old-indexes] [--sortby=column] [--checkCSI] [--propindexes] sourcefile:sourcegroup destfile:destgroup
     -h -- Print usage message.
     -v -- Show more information.
     -o -- Overwrite destination file.
     -R RANGE -- Select a RANGE of rows (in the form "start,stop,step")
         during the copy of *all* the leaves.  Default values are
         "None,None,1", which means a copy of all the rows.
     --non-recursive -- Do not do a recursive copy. Default is to do it.
     --dest-title=title -- Title for the new file (if not specified,
         the source is copied).
     --dont-create-sysattrs -- Do not create sys attrs (default is to do it).
     --dont-copy-userattrs -- Do not copy the user attrs (default is to do it).
     --overwrite-nodes -- Overwrite destination nodes if they exist. Default is
         to not overwrite them.
     --complevel=(0-9) -- Set a compression level (0 for no compression, which
         is the default).
     --complib=lib -- Set the compression library to be used during the copy.
         lib can be set to "zlib", "lzo", "bzip2" or "blosc".  Defaults to
         "zlib".
     --shuffle=(0|1) -- Activate or not the shuffling filter (default is active
         if complevel>0).
     --fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter
        (not active by default).
     --keep-source-filters -- Use the original filters in source files. The
         default is not doing that if any of --complevel, --complib, --shuffle
         or --fletcher32 option is specified.
     --chunkshape=("keep"|"auto"|int|tuple) -- Set a chunkshape.  A value
         of "auto" computes a sensible value for the chunkshape of the
         leaves copied.  The default is to "keep" the original value.
     --upgrade-flavors -- When repacking PyTables 1.x files, the flavor of
         leaves will be unset. With this, such a leaves will be serialized
         as objects with the internal flavor ('numpy' for 2.x series).
     --dont-regenerate-old-indexes -- Disable regenerating old indexes. The
         default is to regenerate old indexes as they are found.
     --sortby=column -- Do a table copy sorted by the index in "column".
         For reversing the order, use a negative value in the "step" part of
         "RANGE" (see "-R" flag).  Only applies to table objects.
     --checkCSI -- Force the check for a CSI index for the --sortby column.
     --propindexes -- Propagate the indexes existing in original tables.  The
         default is to not propagate them.  Only applies to table objects.
    \n""" % os.path.basename(sys.argv[0])

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'hvoR:', [
            'non-recursive',
            'dest-title=',
            'dont-create-sysattrs',
            'dont-copy-userattrs',
            'overwrite-nodes',
            'complevel=',
            'complib=',
            'shuffle=',
            'fletcher32=',
            'keep-source-filters',
            'chunkshape=',
            'upgrade-flavors',
            'dont-regenerate-old-indexes',
            'sortby=',
            'checkCSI',
            'propindexes',
        ])
    except:
        (type, value, traceback) = sys.exc_info()
        print "Error parsing the options. The error was:", value
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    overwritefile = False
    keepfilters = False
    chunkshape = "keep"
    complevel = None
    complib = None
    shuffle = None
    fletcher32 = None
    title = ""
    copyuserattrs = True
    rng = None
    recursive = True
    overwrtnodes = False
    upgradeflavors = False
    sortby = None
    checkCSI = False
    propindexes = False

    # Get the options
    for option in opts:
        if option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            verbose = True
        elif option[0] == '-o':
            overwritefile = True
        elif option[0] == '-R':
            try:
                rng = eval("slice(" + option[1] + ")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '--dest-title':
            title = option[1]
        elif option[0] == '--dont-create-sysattrs':
            createsysattrs = False
        elif option[0] == '--dont-copy-userattrs':
            copyuserattrs = False
        elif option[0] == '--non-recursive':
            recursive = False
        elif option[0] == '--overwrite-nodes':
            overwrtnodes = True
        elif option[0] == '--keep-source-filters':
            keepfilters = True
        elif option[0] == '--chunkshape':
            chunkshape = option[1]
            if chunkshape.isdigit() or chunkshape.startswith('('):
                chunkshape = eval(chunkshape)
        elif option[0] == '--upgrade-flavors':
            upgradeflavors = True
        elif option[0] == '--dont-regenerate-old-indexes':
            regoldindexes = False
        elif option[0] == '--complevel':
            complevel = int(option[1])
        elif option[0] == '--complib':
            complib = option[1]
        elif option[0] == '--shuffle':
            shuffle = int(option[1])
        elif option[0] == '--fletcher32':
            fletcher32 = int(option[1])
        elif option[0] == '--sortby':
            sortby = option[1]
        elif option[0] == '--propindexes':
            propindexes = True
        elif option[0] == '--checkCSI':
            checkCSI = True
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # if we pass a number of files different from 2, abort
    if len(pargs) != 2:
        print "You need to pass both source and destination!."
        sys.stderr.write(usage)
        sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    dst = pargs[1].split(':')
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)
    # Ignore the flavors warnings during upgrading flavor operations
    if upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    if ((complevel, complib, shuffle, fletcher32) == (None, ) * 4
            or keepfilters):
        filters = None
    else:
        if complevel is None:
            complevel = 0
        if shuffle is None:
            if complevel > 0:
                shuffle = True
            else:
                shuffle = False
        if complib is None:
            complib = "zlib"
        if fletcher32 is None:
            fletcher32 = False
        filters = Filters(complevel=complevel,
                          complib=complib,
                          shuffle=shuffle,
                          fletcher32=fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if rng:
        start, stop, step = rng.start, rng.stop, rng.step

    # Some timing
    t1 = time.time()
    cpu1 = time.clock()
    # Copy the file
    if verbose:
        print "+=+" * 20
        print "Recursive copy:", recursive
        print "Applying filters:", filters
        if sortby is not None:
            print "Sorting table(s) by column:", sortby
            print "Forcing a CSI creation:", checkCSI
        if propindexes:
            print "Recreating indexes in copied table(s)"
        print "Start copying %s:%s to %s:%s" % (srcfile, srcnode, dstfile,
                                                dstnode)
        print "+=+" * 20

    # Check whether the specified source node is a group or a leaf
    h5srcfile = open_file(srcfile, 'r')
    srcnodeobject = h5srcfile.get_node(srcnode)
    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'links': 0, 'bytes': 0}
    if isinstance(srcnodeobject, Group):
        copy_children(srcfile,
                      dstfile,
                      srcnode,
                      dstnode,
                      title=title,
                      recursive=recursive,
                      filters=filters,
                      copyuserattrs=copyuserattrs,
                      overwritefile=overwritefile,
                      overwrtnodes=overwrtnodes,
                      stats=stats,
                      start=start,
                      stop=stop,
                      step=step,
                      chunkshape=chunkshape,
                      sortby=sortby,
                      checkCSI=checkCSI,
                      propindexes=propindexes,
                      upgradeflavors=upgradeflavors)
    else:
        # If not a Group, it should be a Leaf
        copy_leaf(srcfile,
                  dstfile,
                  srcnode,
                  dstnode,
                  title=title,
                  filters=filters,
                  copyuserattrs=copyuserattrs,
                  overwritefile=overwritefile,
                  overwrtnodes=overwrtnodes,
                  stats=stats,
                  start=start,
                  stop=stop,
                  step=step,
                  chunkshape=chunkshape,
                  sortby=sortby,
                  checkCSI=checkCSI,
                  propindexes=propindexes,
                  upgradeflavors=upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = time.clock()
    tcopy = round(t2 - t1, 3)
    cpucopy = round(cpu2 - cpu1, 3)
    tpercent = int(round(cpucopy / tcopy, 2) * 100)

    if verbose:
        ngroups = stats['groups']
        nleaves = stats['leaves']
        nlinks = stats['links']
        nbytescopied = stats['bytes']
        nnodes = ngroups + nleaves + nlinks

        print \
            "Groups copied:", ngroups, \
            " Leaves copied:", nleaves, \
            " Links copied:", nlinks
        if copyuserattrs:
            print "User attrs copied"
        else:
            print "User attrs not copied"
        print "KBytes copied:", round(nbytescopied / 1024., 3)
        print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
              (tcopy, cpucopy, tpercent)
        print "Copied nodes/sec: ", round((nnodes) / float(tcopy), 1)
        print "Copied KB/s :", int(nbytescopied / (tcopy * 1024))
Example #12
0
def main():
    global verbose
    global regoldindexes
    global createsysattrs

    usage = """usage: %s [-h] [-v] [-o] [-R start,stop,step] [--non-recursive] [--dest-title=title] [--dont-create-sysattrs] [--dont-copy-userattrs] [--overwrite-nodes] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--keep-source-filters] [--chunkshape=value] [--upgrade-flavors] [--dont-regenerate-old-indexes] [--sortby=column] [--checkCSI] [--propindexes] sourcefile:sourcegroup destfile:destgroup
     -h -- Print usage message.
     -v -- Show more information.
     -o -- Overwrite destination file.
     -R RANGE -- Select a RANGE of rows (in the form "start,stop,step")
         during the copy of *all* the leaves.  Default values are
         "None,None,1", which means a copy of all the rows.
     --non-recursive -- Do not do a recursive copy. Default is to do it.
     --dest-title=title -- Title for the new file (if not specified,
         the source is copied).
     --dont-create-sysattrs -- Do not create sys attrs (default is to do it).
     --dont-copy-userattrs -- Do not copy the user attrs (default is to do it).
     --overwrite-nodes -- Overwrite destination nodes if they exist. Default is
         to not overwrite them.
     --complevel=(0-9) -- Set a compression level (0 for no compression, which
         is the default).
     --complib=lib -- Set the compression library to be used during the copy.
         lib can be set to "zlib", "lzo", "bzip2" or "blosc".  Defaults to
         "zlib".
     --shuffle=(0|1) -- Activate or not the shuffling filter (default is active
         if complevel>0).
     --fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter
        (not active by default).
     --keep-source-filters -- Use the original filters in source files. The
         default is not doing that if any of --complevel, --complib, --shuffle
         or --fletcher32 option is specified.
     --chunkshape=("keep"|"auto"|int|tuple) -- Set a chunkshape.  A value
         of "auto" computes a sensible value for the chunkshape of the
         leaves copied.  The default is to "keep" the original value.
     --upgrade-flavors -- When repacking PyTables 1.x files, the flavor of
         leaves will be unset. With this, such a leaves will be serialized
         as objects with the internal flavor ('numpy' for 2.x series).
     --dont-regenerate-old-indexes -- Disable regenerating old indexes. The
         default is to regenerate old indexes as they are found.
     --sortby=column -- Do a table copy sorted by the index in "column".
         For reversing the order, use a negative value in the "step" part of
         "RANGE" (see "-R" flag).  Only applies to table objects.
     --checkCSI -- Force the check for a CSI index for the --sortby column.
     --propindexes -- Propagate the indexes existing in original tables.  The
         default is to not propagate them.  Only applies to table objects.
    \n""" % os.path.basename(sys.argv[0])

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'hvoR:',
                                    ['non-recursive',
                                     'dest-title=',
                                     'dont-create-sysattrs',
                                     'dont-copy-userattrs',
                                     'overwrite-nodes',
                                     'complevel=',
                                     'complib=',
                                     'shuffle=',
                                     'fletcher32=',
                                     'keep-source-filters',
                                     'chunkshape=',
                                     'upgrade-flavors',
                                     'dont-regenerate-old-indexes',
                                     'sortby=',
                                     'checkCSI',
                                     'propindexes',
                                     ])
    except:
        (type, value, traceback) = sys.exc_info()
        print "Error parsing the options. The error was:", value
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    overwritefile = False
    keepfilters = False
    chunkshape = "keep"
    complevel = None
    complib = None
    shuffle = None
    fletcher32 = None
    title = ""
    copyuserattrs = True
    rng = None
    recursive = True
    overwrtnodes = False
    upgradeflavors = False
    sortby = None
    checkCSI = False
    propindexes = False

    # Get the options
    for option in opts:
        if option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-v':
            verbose = True
        elif option[0] == '-o':
            overwritefile = True
        elif option[0] == '-R':
            try:
                rng = eval("slice("+option[1]+")")
            except:
                print "Error when getting the range parameter."
                (type, value, traceback) = sys.exc_info()
                print "  The error was:", value
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '--dest-title':
            title = option[1]
        elif option[0] == '--dont-create-sysattrs':
            createsysattrs = False
        elif option[0] == '--dont-copy-userattrs':
            copyuserattrs = False
        elif option[0] == '--non-recursive':
            recursive = False
        elif option[0] == '--overwrite-nodes':
            overwrtnodes = True
        elif option[0] == '--keep-source-filters':
            keepfilters = True
        elif option[0] == '--chunkshape':
            chunkshape = option[1]
            if chunkshape.isdigit() or chunkshape.startswith('('):
                chunkshape = eval(chunkshape)
        elif option[0] == '--upgrade-flavors':
            upgradeflavors = True
        elif option[0] == '--dont-regenerate-old-indexes':
            regoldindexes = False
        elif option[0] == '--complevel':
            complevel = int(option[1])
        elif option[0] == '--complib':
            complib = option[1]
        elif option[0] == '--shuffle':
            shuffle = int(option[1])
        elif option[0] == '--fletcher32':
            fletcher32 = int(option[1])
        elif option[0] == '--sortby':
            sortby = option[1]
        elif option[0] == '--propindexes':
            propindexes = True
        elif option[0] == '--checkCSI':
            checkCSI = True
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # if we pass a number of files different from 2, abort
    if len(pargs) != 2:
        print "You need to pass both source and destination!."
        sys.stderr.write(usage)
        sys.exit(0)

    # Catch the files passed as the last arguments
    src = pargs[0].split(':')
    dst = pargs[1].split(':')
    if len(src) == 1:
        srcfile, srcnode = src[0], "/"
    else:
        srcfile, srcnode = src
    if len(dst) == 1:
        dstfile, dstnode = dst[0], "/"
    else:
        dstfile, dstnode = dst

    if srcnode == "":
        # case where filename == "filename:" instead of "filename:/"
        srcnode = "/"

    if dstnode == "":
        # case where filename == "filename:" instead of "filename:/"
        dstnode = "/"

    # Ignore the warnings for tables that contains oldindexes
    # (these will be handled by the copying routines)
    warnings.filterwarnings("ignore", category=OldIndexWarning)
    # Ignore the flavors warnings during upgrading flavor operations
    if upgradeflavors:
        warnings.filterwarnings("ignore", category=FlavorWarning)

    # Build the Filters instance
    if ((complevel, complib, shuffle, fletcher32) == (None,)*4 or keepfilters):
        filters = None
    else:
        if complevel is None:
            complevel = 0
        if shuffle is None:
            if complevel > 0:
                shuffle = True
            else:
                shuffle = False
        if complib is None:
            complib = "zlib"
        if fletcher32 is None:
            fletcher32 = False
        filters = Filters(complevel=complevel, complib=complib,
                          shuffle=shuffle, fletcher32=fletcher32)

    # The start, stop and step params:
    start, stop, step = None, None, 1  # Defaults
    if rng:
        start, stop, step = rng.start, rng.stop, rng.step

    # Some timing
    t1 = time.time()
    cpu1 = time.clock()
    # Copy the file
    if verbose:
        print "+=+"*20
        print "Recursive copy:", recursive
        print "Applying filters:", filters
        if sortby is not None:
            print "Sorting table(s) by column:", sortby
            print "Forcing a CSI creation:", checkCSI
        if propindexes:
            print "Recreating indexes in copied table(s)"
        print "Start copying %s:%s to %s:%s" % (srcfile, srcnode,
                                                dstfile, dstnode)
        print "+=+"*20

    # Check whether the specified source node is a group or a leaf
    h5srcfile = open_file(srcfile, 'r')
    srcnodeobject = h5srcfile.get_node(srcnode)
    # Close the file again
    h5srcfile.close()

    stats = {'groups': 0, 'leaves': 0, 'links': 0, 'bytes': 0}
    if isinstance(srcnodeobject, Group):
        copy_children(
            srcfile, dstfile, srcnode, dstnode,
            title=title, recursive=recursive, filters=filters,
            copyuserattrs=copyuserattrs, overwritefile=overwritefile,
            overwrtnodes=overwrtnodes, stats=stats,
            start=start, stop=stop, step=step, chunkshape=chunkshape,
            sortby=sortby, checkCSI=checkCSI, propindexes=propindexes,
            upgradeflavors=upgradeflavors)
    else:
        # If not a Group, it should be a Leaf
        copy_leaf(
            srcfile, dstfile, srcnode, dstnode,
            title=title, filters=filters, copyuserattrs=copyuserattrs,
            overwritefile=overwritefile, overwrtnodes=overwrtnodes,
            stats=stats, start=start, stop=stop, step=step,
            chunkshape=chunkshape,
            sortby=sortby, checkCSI=checkCSI, propindexes=propindexes,
            upgradeflavors=upgradeflavors)

    # Gather some statistics
    t2 = time.time()
    cpu2 = time.clock()
    tcopy = round(t2-t1, 3)
    cpucopy = round(cpu2-cpu1, 3)
    tpercent = int(round(cpucopy/tcopy, 2)*100)

    if verbose:
        ngroups = stats['groups']
        nleaves = stats['leaves']
        nlinks = stats['links']
        nbytescopied = stats['bytes']
        nnodes = ngroups + nleaves + nlinks

        print \
            "Groups copied:", ngroups, \
            " Leaves copied:", nleaves, \
            " Links copied:", nlinks
        if copyuserattrs:
            print "User attrs copied"
        else:
            print "User attrs not copied"
        print "KBytes copied:", round(nbytescopied/1024., 3)
        print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
              (tcopy, cpucopy, tpercent)
        print "Copied nodes/sec: ", round((nnodes) / float(tcopy), 1)
        print "Copied KB/s :", int(nbytescopied / (tcopy * 1024))
Example #13
0
from tables.file import File, open_file
from tables import Filters
from tables import Atom

import numpy as np

# Check that options are present, else print help msg
parser = OptionParser()
parser.add_option("-i", "--input", dest="infile", help="read input h5 from here")
parser.add_option("-f", "--filters", dest="filters", help="read the filters from here")
parser.add_option("-o", "--filename", dest="filename", help="specify the .h5 filename that will contain all the filtered data")
(options, args) = parser.parse_args()

# Open and prepare input and output hdf5 files 
filename = options.filename
h5output = open_file(filename, mode = "w", title = "Filtered Data File")
zlib_filters = Filters(complib='zlib', complevel=5)

h5input = open_file(options.infile, mode = "r")

# Create a new group under "/" (root)
plates_group = h5output.createGroup("/", 'plates', 'the plates for this replicate')

all_plates = [p._v_name for p in h5input.walk_groups("/plates")]
all_plates = all_plates[1:]

# Create a group for each plate in the output file
for plate in all_plates:
    desc = "plate number " + plate
    h5output.create_group("/plates/",plate,desc)