def getview(self, view, pbar): from pygeode.view import View import numpy as np # Indices of the full axes fullaxis_ind = [self.whichaxis(a) for a in iaxes] # Prepend the other axes ind = [i for i in range(self.naxes) if i not in fullaxis_ind ] + fullaxis_ind # print "ind:", ind # Reverse order rind = [-1] * len(ind) for i, I in enumerate(ind): rind[I] = i assert len(ind) == self.naxes and len(set(ind)) == self.naxes # Construct a view with this new order of axes, and with the specified axes unsliced. axes = tuple([view.axes[i] for i in ind]) slices = tuple([view.slices[i] for i in ind]) bigview = View(axes, slices=slices) bigview = bigview.unslice(*fullaxis_ind) viewloop = list(bigview.loop_mem()) out = np.empty(view.shape, self.dtype) for i, smallview in enumerate(viewloop): # print '??', i for I in fullaxis_ind: assert smallview.shape[I] == bigview.shape[ I], "can't get all of axis '%s' at once" % view.axes[ I].name # Slicing relative to the original view outsl = tuple(smallview.map_to(bigview.clip()).slices) # Reorder the axes to the original order axes = tuple([smallview.axes[I] for I in rind]) assert axes == self.axes slices = tuple([smallview.slices[I] for I in rind]) smallview = View(axes, slices=slices) # fudge outsl for this new order outsl = tuple([outsl[I] for I in rind]) # Slicing the 'full' axes to get what we originally needed insl = [slice(None)] * self.naxes for I in fullaxis_ind: insl[I] = view.slices[I] # Get the data tmp = old_getview(self, smallview, pbar=pbar.part(i, len(viewloop))) # print '??', out.shape, '[', outsl, ']', ' = ', tmp.shape, '[', insl, ']' out[outsl] = tmp[insl] return out
def getview (self, view, pbar): from pygeode.view import View import numpy as np # Indices of the full axes fullaxis_ind = [self.whichaxis(a) for a in iaxes] # Prepend the other axes ind = [i for i in range(self.naxes) if i not in fullaxis_ind] + fullaxis_ind # print "ind:", ind # Reverse order rind = [-1] * len(ind) for i,I in enumerate(ind): rind[I] = i assert len(ind) == self.naxes and len(set(ind)) == self.naxes # Construct a view with this new order of axes, and with the specified axes unsliced. axes = tuple([view.axes[i] for i in ind]) slices = tuple([view.slices[i] for i in ind]) bigview = View(axes, slices = slices) bigview = bigview.unslice(*fullaxis_ind) viewloop = list(bigview.loop_mem()) out = np.empty(view.shape, self.dtype) for i,smallview in enumerate(viewloop): # print '??', i for I in fullaxis_ind: assert smallview.shape[I] == bigview.shape[I], "can't get all of axis '%s' at once"%view.axes[I].name # Slicing relative to the original view outsl = tuple(smallview.map_to(bigview.clip()).slices) # Reorder the axes to the original order axes = tuple([smallview.axes[I] for I in rind]) assert axes == self.axes slices = tuple([smallview.slices[I] for I in rind]) smallview = View (axes, slices = slices) # fudge outsl for this new order outsl = tuple([outsl[I] for I in rind]) # Slicing the 'full' axes to get what we originally needed insl = [slice(None)] * self.naxes for I in fullaxis_ind: insl[I] = view.slices[I] # Get the data tmp = old_getview (self, smallview, pbar = pbar.part(i,len(viewloop)) ) # print '??', out.shape, '[', outsl, ']', ' = ', tmp.shape, '[', insl, ']' out[outsl] = tmp[insl] return out
def write_xdr(var, wfile): import struct import numpy as np from pygeode.view import View lenstr = struct.pack('!2', var.size, var.size) wfile.write(lenstr) # Break the values into memory-friendly chunks if hasattr(var, 'values'): values_iter = [var.values] else: view = View(var.axes) # Trap and handle any I/O errors viewloop = view.loop_mem() #TODO: make this more general - should we be futzing around with the axes at this level # Break it up even further along the time axis? (so we don't start a long process through the whole dataset) if var.naxes > 2: new_viewloop = [] for v in viewloop: for s in v.integer_indices[0]: new_viewloop.append(v.modify_slice(0, [s])) viewloop = new_viewloop values_iter = (get_data_trap_io(v, var) for v in viewloop) for values in values_iter: daptype = np2dap[values.dtype.name] if daptype in ('Byte', 'String'): # # Do byte encoding here # raise Exception values = np.ascontiguousarray(values, 'uint8') s = lib.int8toStr(values) elif daptype in ('UInt16', 'Int16', 'UInt32', 'Int32'): values = np.ascontiguousarray(values, 'int32') s = lib.int32toStr(values) elif daptype == 'Float32': values = np.ascontiguousarray(values, 'float32') s = lib.float32toStr(values) elif daptype == 'Float64': values = np.ascontiguousarray(values, 'float64') s = lib.float64toStr(values) wfile.write(s)
def write_xdr(var, wfile): import struct import numpy as np from pygeode.view import View lenstr = struct.pack('!2l', var.size, var.size) wfile.write(lenstr) # Break the values into memory-friendly chunks if hasattr (var, 'values'): values_iter = [var.values] else: view = View(var.axes) # Trap and handle any I/O errors viewloop = view.loop_mem() #TODO: make this more general - should we be futzing around with the axes at this level # Break it up even further along the time axis? (so we don't start a long process through the whole dataset) if var.naxes > 2: new_viewloop = [] for v in viewloop: for s in v.integer_indices[0]: new_viewloop.append(v.modify_slice(0,[s])) viewloop = new_viewloop values_iter = (get_data_trap_io(v,var) for v in viewloop) for values in values_iter: daptype = np2dap[values.dtype.name] if daptype in ('Byte','String'): # # Do byte encoding here # raise Exception values = np.ascontiguousarray(values, 'uint8'); s = lib.int8toStr(values) elif daptype in ('UInt16', 'Int16', 'UInt32', 'Int32'): values = np.ascontiguousarray(values, 'int32') s = lib.int32toStr(values) elif daptype == 'Float32': values = np.ascontiguousarray(values, 'float32') s = lib.float32toStr(values) elif daptype == 'Float64': values = np.ascontiguousarray(values, 'float64') s = lib.float64toStr(values) wfile.write(s)
def to_xarray(dataset): """ Converts a PyGeode Dataset into an xarray Dataset. Parameters ---------- dataset : pygeode.Dataset The dataset to be converted. Returns ------- out : xarray.Dataset An object which can be used with the xarray package. """ from pygeode.dataset import asdataset from pygeode.formats.cfmeta import encode_cf from pygeode.view import View from dask.base import tokenize import dask.array as da import xarray as xr dataset = asdataset(dataset) # Encode the axes/variables with CF metadata. dataset = encode_cf(dataset) out = dict() # Loop over each axis and variable. for var in list(dataset.axes) + list(dataset.vars): # Generate a unique name to identify it with dask. name = var.name + "-" + tokenize(var) dsk = dict() dims = [a.name for a in var.axes] # Special case: already have the values in memory. if hasattr(var, 'values'): out[var.name] = xr.DataArray(var.values, dims=dims, attrs=var.atts, name=var.name) continue # Keep track of all the slices that were made over each dimension. # This information will be used to determine the "chunking" that was done # on the variable from inview.loop_mem(). slice_order = [[] for a in var.axes] chunks = [] # Break up the variable into into portions that are small enough to fit # in memory. These will become the "chunks" for dask. inview = View(var.axes) for outview in inview.loop_mem(): integer_indices = list(map(tuple, outview.integer_indices)) # Determine *how* loop_mem is splitting the axes, and define the chunk # sizes accordingly. # A little indirect, but loop_mem doesn't make its chunking choices # available to the caller. for o, sl in zip(slice_order, integer_indices): if sl not in o: o.append(sl) ind = [o.index(sl) for o, sl in zip(slice_order, integer_indices)] # Add this chunk to the dask array. key = tuple([name] + ind) dsk[key] = (var.getview, outview, False) # Construct the dask array. chunks = [list(map(len, sl)) for sl in slice_order] arr = da.Array(dsk, name, chunks, dtype=var.dtype) # Wrap this into an xarray.DataArray (with metadata and named axes). out[var.name] = xr.DataArray(arr, dims=dims, attrs=var.atts, name=var.name) # Build the final xarray.Dataset. out = xr.Dataset(out, attrs=dataset.atts) # Re-decode the CF metadata on the xarray side. out = xr.conventions.decode_cf(out) return out
def to_xarray(dataset): """ Converts a PyGeode Dataset into an xarray Dataset. Parameters ---------- dataset : pygeode.Dataset The dataset to be converted. Returns ------- out : xarray.Dataset An object which can be used with the xarray package. """ from pygeode.dataset import asdataset from pygeode.formats.cfmeta import encode_cf from pygeode.view import View from dask.base import tokenize import dask.array as da import xarray as xr dataset = asdataset(dataset) # Encode the axes/variables with CF metadata. dataset = encode_cf(dataset) out = dict() # Loop over each axis and variable. for var in list(dataset.axes) + list(dataset.vars): # Generate a unique name to identify it with dask. name = var.name + "-" + tokenize(var) dsk = dict() dims = [a.name for a in var.axes] # Special case: already have the values in memory. if hasattr(var,'values'): out[var.name] = xr.DataArray(var.values, dims=dims, attrs=var.atts, name=var.name) continue # Keep track of all the slices that were made over each dimension. # This information will be used to determine the "chunking" that was done # on the variable from inview.loop_mem(). slice_order = [[] for a in var.axes] chunks = [] # Break up the variable into into portions that are small enough to fit # in memory. These will become the "chunks" for dask. inview = View(var.axes) for outview in inview.loop_mem(): integer_indices = map(tuple,outview.integer_indices) # Determine *how* loop_mem is splitting the axes, and define the chunk # sizes accordingly. # A little indirect, but loop_mem doesn't make its chunking choices # available to the caller. for o, sl in zip(slice_order, integer_indices): if sl not in o: o.append(sl) ind = [o.index(sl) for o, sl in zip(slice_order, integer_indices)] # Add this chunk to the dask array. key = tuple([name] + ind) dsk[key] = (var.getview, outview, False) # Construct the dask array. chunks = [map(len,sl) for sl in slice_order] arr = da.Array(dsk, name, chunks, dtype=var.dtype) # Wrap this into an xarray.DataArray (with metadata and named axes). out[var.name] = xr.DataArray(arr, dims = dims, attrs = var.atts, name=var.name) # Build the final xarray.Dataset. out = xr.Dataset(out, attrs=dataset.atts) # Re-decode the CF metadata on the xarray side. out = xr.conventions.decode_cf(out) return out