Beispiel #1
0
        def getview(self, view, pbar):
            from pygeode.view import View
            import numpy as np
            # Indices of the full axes
            fullaxis_ind = [self.whichaxis(a) for a in iaxes]
            # Prepend the other axes
            ind = [i for i in range(self.naxes) if i not in fullaxis_ind
                   ] + fullaxis_ind
            #      print "ind:", ind
            # Reverse order
            rind = [-1] * len(ind)
            for i, I in enumerate(ind):
                rind[I] = i
            assert len(ind) == self.naxes and len(set(ind)) == self.naxes
            # Construct a view with this new order of axes, and with the specified axes unsliced.
            axes = tuple([view.axes[i] for i in ind])
            slices = tuple([view.slices[i] for i in ind])
            bigview = View(axes, slices=slices)
            bigview = bigview.unslice(*fullaxis_ind)
            viewloop = list(bigview.loop_mem())
            out = np.empty(view.shape, self.dtype)

            for i, smallview in enumerate(viewloop):
                #        print '??', i
                for I in fullaxis_ind:
                    assert smallview.shape[I] == bigview.shape[
                        I], "can't get all of axis '%s' at once" % view.axes[
                            I].name

                # Slicing relative to the original view
                outsl = tuple(smallview.map_to(bigview.clip()).slices)

                # Reorder the axes to the original order
                axes = tuple([smallview.axes[I] for I in rind])
                assert axes == self.axes
                slices = tuple([smallview.slices[I] for I in rind])
                smallview = View(axes, slices=slices)

                # fudge outsl for this new order
                outsl = tuple([outsl[I] for I in rind])

                # Slicing the 'full' axes to get what we originally needed
                insl = [slice(None)] * self.naxes
                for I in fullaxis_ind:
                    insl[I] = view.slices[I]

                # Get the data
                tmp = old_getview(self,
                                  smallview,
                                  pbar=pbar.part(i, len(viewloop)))

                #        print '??', out.shape, '[', outsl, ']', ' = ', tmp.shape, '[', insl, ']'
                out[outsl] = tmp[insl]

            return out
Beispiel #2
0
    def getview (self, view, pbar):
      from pygeode.view import View
      import numpy as np
      # Indices of the full axes
      fullaxis_ind = [self.whichaxis(a) for a in iaxes]
      # Prepend the other axes
      ind = [i for i in range(self.naxes) if i not in fullaxis_ind] + fullaxis_ind
#      print "ind:", ind
      # Reverse order
      rind = [-1] * len(ind)
      for i,I in enumerate(ind):
        rind[I] = i
      assert len(ind) == self.naxes and len(set(ind)) == self.naxes
      # Construct a view with this new order of axes, and with the specified axes unsliced.
      axes = tuple([view.axes[i] for i in ind])
      slices = tuple([view.slices[i] for i in ind])
      bigview = View(axes, slices = slices)
      bigview = bigview.unslice(*fullaxis_ind)
      viewloop = list(bigview.loop_mem())
      out = np.empty(view.shape, self.dtype)

      for i,smallview in enumerate(viewloop):
#        print '??', i
        for I in fullaxis_ind:
          assert smallview.shape[I] == bigview.shape[I], "can't get all of axis '%s' at once"%view.axes[I].name

        # Slicing relative to the original view
        outsl = tuple(smallview.map_to(bigview.clip()).slices)

        # Reorder the axes to the original order
        axes = tuple([smallview.axes[I] for I in rind])
        assert axes == self.axes
        slices = tuple([smallview.slices[I] for I in rind])
        smallview = View (axes, slices = slices)

        # fudge outsl for this new order
        outsl = tuple([outsl[I] for I in rind])

        # Slicing the 'full' axes to get what we originally needed
        insl = [slice(None)] * self.naxes
        for I in fullaxis_ind: insl[I] = view.slices[I]



        # Get the data
        tmp = old_getview (self, smallview, pbar = pbar.part(i,len(viewloop)) )

#        print '??', out.shape, '[', outsl, ']', ' = ', tmp.shape, '[', insl, ']'
        out[outsl] = tmp[insl]

      return out
Beispiel #3
0
def write_xdr(var, wfile):
    import struct
    import numpy as np
    from pygeode.view import View

    lenstr = struct.pack('!2', var.size, var.size)
    wfile.write(lenstr)

    # Break the values into memory-friendly chunks
    if hasattr(var, 'values'):
        values_iter = [var.values]
    else:
        view = View(var.axes)
        # Trap and handle any I/O errors
        viewloop = view.loop_mem()
        #TODO: make this more general - should we be futzing around with the axes at this level
        # Break it up even further along the time axis?  (so we don't start a long process through the whole dataset)
        if var.naxes > 2:
            new_viewloop = []
            for v in viewloop:
                for s in v.integer_indices[0]:
                    new_viewloop.append(v.modify_slice(0, [s]))
            viewloop = new_viewloop

        values_iter = (get_data_trap_io(v, var) for v in viewloop)

    for values in values_iter:

        daptype = np2dap[values.dtype.name]
        if daptype in ('Byte', 'String'):
            #      # Do byte encoding here
            #      raise Exception
            values = np.ascontiguousarray(values, 'uint8')
            s = lib.int8toStr(values)
        elif daptype in ('UInt16', 'Int16', 'UInt32', 'Int32'):
            values = np.ascontiguousarray(values, 'int32')
            s = lib.int32toStr(values)
        elif daptype == 'Float32':
            values = np.ascontiguousarray(values, 'float32')
            s = lib.float32toStr(values)
        elif daptype == 'Float64':
            values = np.ascontiguousarray(values, 'float64')
            s = lib.float64toStr(values)

        wfile.write(s)
Beispiel #4
0
def write_xdr(var, wfile):
  import struct
  import numpy as np
  from pygeode.view import View

  lenstr = struct.pack('!2l', var.size, var.size)
  wfile.write(lenstr)

  # Break the values into memory-friendly chunks
  if hasattr (var, 'values'):
    values_iter = [var.values]
  else:
    view = View(var.axes)
    # Trap and handle any I/O errors
    viewloop = view.loop_mem()
    #TODO: make this more general - should we be futzing around with the axes at this level
    # Break it up even further along the time axis?  (so we don't start a long process through the whole dataset)
    if var.naxes > 2:
      new_viewloop = []
      for v in viewloop:
        for s in v.integer_indices[0]:
          new_viewloop.append(v.modify_slice(0,[s]))
      viewloop = new_viewloop

    values_iter = (get_data_trap_io(v,var) for v in viewloop)

  for values in values_iter:

    daptype = np2dap[values.dtype.name]
    if daptype in ('Byte','String'):
#      # Do byte encoding here
#      raise Exception
      values = np.ascontiguousarray(values, 'uint8');
      s = lib.int8toStr(values)
    elif daptype in ('UInt16', 'Int16', 'UInt32', 'Int32'):
      values = np.ascontiguousarray(values, 'int32')
      s = lib.int32toStr(values)
    elif daptype == 'Float32':
      values = np.ascontiguousarray(values, 'float32')
      s = lib.float32toStr(values)
    elif daptype == 'Float64':
      values = np.ascontiguousarray(values, 'float64')
      s = lib.float64toStr(values)

    wfile.write(s)
Beispiel #5
0
def to_xarray(dataset):
    """
  Converts a PyGeode Dataset into an xarray Dataset.

  Parameters
  ----------
  dataset : pygeode.Dataset
    The dataset to be converted.

  Returns
  -------
  out : xarray.Dataset
    An object which can be used with the xarray package.
  """
    from pygeode.dataset import asdataset
    from pygeode.formats.cfmeta import encode_cf
    from pygeode.view import View
    from dask.base import tokenize
    import dask.array as da
    import xarray as xr
    dataset = asdataset(dataset)
    # Encode the axes/variables with CF metadata.
    dataset = encode_cf(dataset)
    out = dict()
    # Loop over each axis and variable.
    for var in list(dataset.axes) + list(dataset.vars):
        # Generate a unique name to identify it with dask.
        name = var.name + "-" + tokenize(var)
        dsk = dict()
        dims = [a.name for a in var.axes]

        # Special case: already have the values in memory.
        if hasattr(var, 'values'):
            out[var.name] = xr.DataArray(var.values,
                                         dims=dims,
                                         attrs=var.atts,
                                         name=var.name)
            continue

        # Keep track of all the slices that were made over each dimension.
        # This information will be used to determine the "chunking" that was done
        # on the variable from inview.loop_mem().
        slice_order = [[] for a in var.axes]
        chunks = []
        # Break up the variable into into portions that are small enough to fit
        # in memory.  These will become the "chunks" for dask.
        inview = View(var.axes)
        for outview in inview.loop_mem():
            integer_indices = list(map(tuple, outview.integer_indices))
            # Determine *how* loop_mem is splitting the axes, and define the chunk
            # sizes accordingly.
            # A little indirect, but loop_mem doesn't make its chunking choices
            # available to the caller.
            for o, sl in zip(slice_order, integer_indices):
                if sl not in o:
                    o.append(sl)
            ind = [o.index(sl) for o, sl in zip(slice_order, integer_indices)]
            # Add this chunk to the dask array.
            key = tuple([name] + ind)
            dsk[key] = (var.getview, outview, False)
        # Construct the dask array.
        chunks = [list(map(len, sl)) for sl in slice_order]
        arr = da.Array(dsk, name, chunks, dtype=var.dtype)
        # Wrap this into an xarray.DataArray (with metadata and named axes).
        out[var.name] = xr.DataArray(arr,
                                     dims=dims,
                                     attrs=var.atts,
                                     name=var.name)
    # Build the final xarray.Dataset.
    out = xr.Dataset(out, attrs=dataset.atts)
    # Re-decode the CF metadata on the xarray side.
    out = xr.conventions.decode_cf(out)
    return out
Beispiel #6
0
def to_xarray(dataset):
  """
  Converts a PyGeode Dataset into an xarray Dataset.

  Parameters
  ----------
  dataset : pygeode.Dataset
    The dataset to be converted.

  Returns
  -------
  out : xarray.Dataset
    An object which can be used with the xarray package.
  """
  from pygeode.dataset import asdataset
  from pygeode.formats.cfmeta import encode_cf
  from pygeode.view import View
  from dask.base import tokenize
  import dask.array as da
  import xarray as xr
  dataset = asdataset(dataset)
  # Encode the axes/variables with CF metadata.
  dataset = encode_cf(dataset)
  out = dict()
  # Loop over each axis and variable.
  for var in list(dataset.axes) + list(dataset.vars):
    # Generate a unique name to identify it with dask.
    name = var.name + "-" + tokenize(var)
    dsk = dict()
    dims = [a.name for a in var.axes]

    # Special case: already have the values in memory.
    if hasattr(var,'values'):
      out[var.name] = xr.DataArray(var.values, dims=dims, attrs=var.atts, name=var.name)
      continue

    # Keep track of all the slices that were made over each dimension.
    # This information will be used to determine the "chunking" that was done
    # on the variable from inview.loop_mem().
    slice_order = [[] for a in var.axes]
    chunks = []
    # Break up the variable into into portions that are small enough to fit
    # in memory.  These will become the "chunks" for dask.
    inview = View(var.axes)
    for outview in inview.loop_mem():
      integer_indices = map(tuple,outview.integer_indices)
      # Determine *how* loop_mem is splitting the axes, and define the chunk
      # sizes accordingly.
      # A little indirect, but loop_mem doesn't make its chunking choices
      # available to the caller.
      for o, sl in zip(slice_order, integer_indices):
        if sl not in o:
          o.append(sl)
      ind = [o.index(sl) for o, sl in zip(slice_order, integer_indices)]
      # Add this chunk to the dask array.
      key = tuple([name] + ind)
      dsk[key] = (var.getview, outview, False)
    # Construct the dask array.
    chunks = [map(len,sl) for sl in slice_order]
    arr = da.Array(dsk, name, chunks, dtype=var.dtype)
    # Wrap this into an xarray.DataArray (with metadata and named axes).
    out[var.name] = xr.DataArray(arr, dims = dims, attrs = var.atts, name=var.name)
  # Build the final xarray.Dataset.
  out = xr.Dataset(out, attrs=dataset.atts)
  # Re-decode the CF metadata on the xarray side.
  out = xr.conventions.decode_cf(out)
  return out