def get_keys_stops_divisions(path, key, stop, sorted_index, chunksize):
        """
        Get the "keys" or group identifiers which match the given key, which
        can contain wildcards. This uses the hdf file identified by the
        given path. Also get the index of the last row of data for each matched
        key.
        """
        with pd.HDFStore(path, mode=mode) as hdf:
            import glob
            from distutils.version import LooseVersion

            if LooseVersion(pd.__version__) >= LooseVersion("0.24"):
                if not glob.has_magic(key):
                    keys = [key]
                else:
                    keys = [k for k in hdf.keys() if fnmatch(k, key)]
                    # https://github.com/dask/dask/issues/5934
                    # TODO: remove this part if/when pandas copes with all keys
                    keys.extend(
                        n._v_pathname
                        for n in hdf._handle.walk_nodes("/", classname="Table")
                        if fnmatch(n._v_pathname, key) and
                        n._v_name != u"table" and n._v_pathname not in keys)
            else:
                # TODO: remove if we require pandas >= 0.24
                keys = [k for k in hdf.keys() if fnmatch(k, key)]
            stops = []
            divisions = []
            for k in keys:
                storer = hdf.get_storer(k)
                if storer.format_type != "table":
                    raise TypeError(dont_use_fixed_error_message)
                if stop is None:
                    stops.append(storer.nrows)
                elif stop > storer.nrows:
                    raise ValueError("Stop keyword exceeds dataset number "
                                     "of rows ({})".format(storer.nrows))
                else:
                    stops.append(stop)
                if sorted_index:
                    division = [
                        storer.read_column("index",
                                           start=start,
                                           stop=start + 1)[0]
                        for start in range(0, storer.nrows, chunksize)
                    ]
                    division_end = storer.read_column("index",
                                                      start=storer.nrows - 1,
                                                      stop=storer.nrows)[0]

                    division.append(division_end)
                    divisions.append(division)
                else:
                    divisions.append(None)

        return keys, stops, divisions
Beispiel #2
0
def _expand_key(key, hdf):
    import glob

    if not glob.has_magic(key):
        keys = [key]
    else:
        keys = [k for k in hdf.keys() if fnmatch(k, key)]
        # https://github.com/dask/dask/issues/5934
        # TODO: remove this part if/when pandas copes with all keys
        keys.extend(n._v_pathname
                    for n in hdf._handle.walk_nodes("/", classname="Table")
                    if fnmatch(n._v_pathname, key) and n._v_name != "table"
                    and n._v_pathname not in keys)
    return keys