예제 #1
0
    def __enter__(self):
        self.nca = NetCDF(self.ncaPath, self.mode)
        self.groups = {}

        for groupname in self.nca.groups:
            self.groups[groupname] = S3NetCDFGroup(self, groupname)
        return self
예제 #2
0
    def __run(self, vname, indices, _data=None, _value=None):
        shape = self.shape
        masterShape = self.master

        partitions = getPartitions(indices, shape, masterShape)
        masterIndices = getMasterIndices(indices, shape, masterShape)

        for part in partitions:
            idata, ipart = getSubIndex(part, shape, masterIndices)
            # GET
            if _data is not None:
                d = _data.flatten()
                filepath = self._getFile(vname, part)
                with NetCDF(filepath, "r") as netcdf:
                    d = getItemNetCDF(netcdf[vname], d, ipart, idata)
                if not self.parent.localOnly and self.parent.autoRemove:
                    os.remove(filepath)
                _data = d.reshape(_data.shape)

            # SET
            if _value is not None:
                filepath = self._setFile(vname, part)
                with NetCDF(filepath, "r+") as netcdf:
                    setItemNetCDF(netcdf[vname], _value, ipart, idata)
                if not self.parent.localOnly:
                    self.parent.s3.upload(filepath)
                    if self.parent.autoRemove: os.remove(filepath)
        return _data
예제 #3
0
 def _setFile(self, vname, part):
     localOnly = self.parent.localOnly
     s3 = self.parent.s3
     filepath = self._getFilepath(vname, part)
     if not os.path.exists(filepath):
         if localOnly or not s3.exists(filepath):
             NetCDF.create(filepath,
                           dimensions=self.childDimensions,
                           variables=copy.deepcopy(self.variables))
         else:
             s3.download(filepath)
     return filepath
예제 #4
0
 def _quickSet(self, vname, partIndex, value):
     parts = np.zeros(self.ndata, dtype="int")
     parts[0] = partIndex
     filepath = self._setFile(vname, parts)
     with NetCDF(filepath, "r+") as netcdf:
         netcdf[vname][:] = value
     if not self.parent.localOnly: self.parent.s3.upload(filepath)
예제 #5
0
 def _quickGet(self, vname, i):
     print("here", vname, i)
     parts = np.zeros(self.ndata, dtype="int")
     parts[0] = int(np.floor(i / self.child[0]))
     index = int(i % self.child[0])
     filepath = self._getFile(vname, parts)
     with NetCDF(filepath, "r") as netcdf:
         var = netcdf[vname][index]
     return var
예제 #6
0
class S3NetCDF(object):
    """
  A NetCDF class
  
  Parameters
  ----------
  object:
    name : str, Name of cdffile
    folder: str,path
    metadata : 
    squeeze : squeeze get data - mainly used for testing
    bucket: Name of S3 bucket
    localOnly:bool
      To include or ignore s3 storage
      Default:True
    ncSize:float,optional
        Max partition size (MB)   
    nca:object
      dimensions:[obj]
        name:str
        value:int
      groups:[obj]
        name:str,
        variables:[obj]
          name:str
          type:str
          dimensions:[str],
          etc..
  
  Attributes
  ----------
  name :str, Name of NetCDF
  folder :path
  localOnly:bool
  bucket:str

  
  ncPath : path
    File contains nodes, connectivity table and static variables 
  ncaPath :path
    Master file, contains information about master and child netcdf files
  groups : [NetCDF2DGroup]
    Contains information on different groups or folders (i.e "s","t")
    Each group contains different variables but with the same dimensions
  """
    def __init__(self, obj, mode="r"):
        obj = copy.deepcopy(obj)

        self.mode = mode = mode
        self.name = name = obj.pop("name", None)
        self.bucket = bucket = obj.pop("bucket", None)
        self.dynamodb = dynamodb = obj.pop("dynamodb", None)
        self.s3prefix = s3prefix = obj.pop("s3prefix", None)
        self.localOnly = localOnly = obj.pop("localOnly", True)
        self.squeeze = squeeze = obj.pop("squeeze", False)
        self.cacheLocation = cacheLocation = obj.pop("cacheLocation",
                                                     os.getcwd)
        self.maxPartitions = maxPartitions = obj.pop("maxPartitions", 10)
        self.ncSize = ncSize = obj.pop("ncSize", 10)
        self.memorySize = memorySize = obj.pop("memorySize", 20)
        self.cacheSize = cacheSize = obj.pop("cacheSize", 10) * 1024**2
        self.verbose = verbose = obj.pop("verbose", False)
        self.overwrite = overwrite = obj.pop("overwrite", False)
        self.autoRemove = autoRemove = obj.pop("autoRemove", True)
        self.s3 = s3 = S3Client(self, obj.pop("credentials", {}))
        self.folder = folder = os.path.join(self.cacheLocation, self.name)
        self.cache = cache = Cache(self)
        self.groups = None
        self.nca = None

        if name is None: raise Exception("NetCDF needs a name")
        if not localOnly and bucket is None:
            raise Exception("Need a S3 bucket")
        if not os.path.exists(folder): os.makedirs(folder, exist_ok=True)

        self.ncaPath = ncaPath = os.path.join(folder, "{}.nca".format(name))

        if not os.path.exists(ncaPath) or overwrite:
            if localOnly or not s3.exists(ncaPath) or overwrite:
                if verbose:
                    print(
                        "Creating a new .nca from object (localOnly={},ncaPath={},overwrite={})"
                        .format(localOnly, s3.exists(ncaPath), overwrite))
                if not "nca" in obj:
                    raise Exception("NetCDF needs a nca object")
                createNetCDF(ncaPath, ncSize=ncSize, **obj["nca"])
                if not localOnly: s3.upload(ncaPath)
                if not localOnly and dynamodb: s3.insert()
            elif s3.exists(ncaPath):
                if verbose:
                    print("Downloading .nca from S3 - {}".format(ncaPath))
                s3.download(ncaPath)
            else:
                raise Exception("Unknown error")

    def __enter__(self):
        self.nca = NetCDF(self.ncaPath, self.mode)
        self.groups = {}

        for groupname in self.nca.groups:
            self.groups[groupname] = S3NetCDFGroup(self, groupname)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.nca.close()

    def updateMetadata(self, obj):
        self.nca.updateMetadata(obj)

    @property
    def obj(self):
        return self.nca.obj

    @property
    def dimensions(self):
        return self.nca.obj['dimensions']

    @property
    def variables(self):
        return self.nca.allvariables

    def getGroupsByVariable(self, vname):
        groupsByVariable = self.nca.obj['groupsByVariable']
        if not vname in groupsByVariable:
            raise Exception("Variable {} does not exist".format(vname))
        return groupsByVariable[vname]

    def getVariablesByDimension(self, dname):
        variablesByDimension = self.nca.obj['variablesByDimension']
        if not dname in variablesByDimension:
            raise Exception("Dimension {} does not exist".format(dname))
        return variablesByDimension[dname]

    def setlocalOnly(self, value):
        self.localOnly = value

    def _item_(self, idx):
        if not isinstance(idx, tuple) or len(idx) < 2:
            raise TypeError(
                "groupname and variablename are required, e.g netcdf2d['{groupname}','{variablename}']"
            )

        idx = list(idx)
        groupname = idx.pop(0)
        idx = tuple(idx)

        groups = self.groups
        if not groupname in groups:
            raise Exception("Group '{}' does not exist".format(groupname))
        group = groups[groupname]
        return group, idx

    def __getitem__(self, idx):
        """
      netcdf2d["{groupname}","{variablename}",{...indices...}]
    """
        if isinstance(idx, str): return self.groups[idx]
        group, idx = self._item_(idx)
        data = group[idx]
        data = np.squeeze(data) if self.squeeze else data
        return data

    def __setitem__(self, idx, value):
        """
      netcdf2d["{groupname}","{variablename}",{...indices...}]=np.array()
    """
        group, idx = self._item_(idx)
        group[idx] = value

    def query(self, obj, return_dimensions=False, return_indices=False):
        """
      Get data using obj instead of using __getitem__
      This function will search the obj using keys such ash "group","variable" and name of dimensions (e.g. "x","time")
      If "group" does not exist, it will find the "group" based on the name of the variable and with the least amount of partition (e.g "s","t")
    """
        dimensions = self.dimensions

        obj = parseObj(obj, dimensions)

        # if not 'variable' in obj:raise Exception("Needs 'variable' in query")
        vname = obj['variable']
        gname = obj['group']

        if gname is None:

            groups = self.getGroupsByVariable(vname)

            if len(groups) > 1:
                dims = obj.pop('dims')
                _groups = list(
                    filter(
                        lambda x: any(dim in dims
                                      for dim in self.groups[x].dimensions),
                        groups))
                if (len(_groups) > 0):
                    groups = _groups

            # groups=containers.get(container,_groups)
            # for g in groups:
            #   if not g in _groups:raise Exception("Please review attribute (container={})  since variable '{}' does not exist in group '{}' (s3netcdf/metadata/{})".format(container,vname,g,container))

            gname = min(
                groups,
                key=lambda x: len(self.groups[x].getPartitions(vname, obj)))

        partitions, group, idx, indices = self.groups[gname].getPartitions(
            vname, obj, False)

        if len(partitions) > self.maxPartitions:
            raise Exception(
                "Change group or select smaller query - {} /MaxPartitions is {}"
                .format(len(partitions), self.maxPartitions))

        data = group[(vname, *idx)]
        data = np.squeeze(data) if self.squeeze else data

        if return_dimensions and return_indices:
            return data, group.dimensions, indices
        if return_dimensions: return data, group.dimensions
        if return_indices: return data, indices
        return data
예제 #7
0
def createNetCDF(filePath,
                 folder=os.getcwd(),
                 metadata={},
                 dimensions={},
                 variables={},
                 groups={},
                 ncSize=1.0):
    """
  Create typical NetCDF file based on set of variables
  
  Parameters
  ----------
  filePath: str,path
  folder: str,optional.
  metadata:object,optional.
  dimensions:object,optional.
  variables:object,optional.
  groups:object,optional.
  ncSize:object,optional.
  
  Notes
  -----
  Only applicable for 1 layer of groups
  
  
  """

    NetCDF.create(filePath, metadata, dimensions, variables)

    with Dataset(filePath, "r+") as netcdf:
        for name in groups:
            group = groups[name]
            if not 'variables' in group:
                raise Exception("Group needs variables")
            if not 'dimensions' in group:
                raise Exception("Group needs dimensions")
            dims = group['dimensions']
            variables = group['variables']

            # Add dimensions to variable
            for vname in variables:
                variables[vname]['dimensions'] = dims

            shape = []

            for d in dims:
                if (d == name):
                    raise Exception(
                        "group can't have the same name of a dimension")
                if not d in netcdf.dimensions:
                    raise Exception("Dimension {} does not exist".format(d))
                value = netcdf.dimensions[d].size
                shape.append(value)

            shape = np.array(shape, dtype="i4")
            master, child = getMasterShape(shape,
                                           return_childshape=True,
                                           ncSize=ncSize)

            group = netcdf.createGroup(name)
            group = netcdf[name]

            cdims = {}
            for i, d in enumerate(dims):
                cdims[d] = child[i]
            NetCDF._create(
                group, {
                    'cdims': cdims,
                    'shape': shape,
                    'master': master,
                    'child': child,
                    'dims': dims
                }, {}, variables)
예제 #8
0
def test_createNetCDF():

    folder = "../s3"
    filePath = os.path.join(folder, "test1.nc")
    metadata = dict(title="Mytitle")
    dimensions = dict(npe=3, nnode=100, ntime=1000, nelem=10)
    variables = dict(
        a=dict(type="float32",
               dimensions=["nnode"],
               units="m",
               standard_name="",
               long_name="",
               least_significant_digit=3),
        lat=dict(type="float64",
                 dimensions=["nnode"],
                 units="m",
                 standard_name="",
                 long_name=""),
        lng=dict(type="float64",
                 dimensions=["nnode"],
                 units="m",
                 standard_name="",
                 long_name=""),
        elem=dict(type="int32",
                  dimensions=["nelem"],
                  units="m",
                  standard_name="",
                  long_name=""),
        time=dict(type="float64",
                  dimensions=["ntime"],
                  units="hours since 1970-01-01 00:00:00.0",
                  calendar="gregorian",
                  standard_name="",
                  long_name=""),
    )
    variables2 = dict(u=dict(type="float32",
                             units="m/s",
                             standard_name="",
                             long_name=""), )
    groups = dict(s=dict(dimensions=["ntime", "nnode"],
                         variables=variables2), )

    createNetCDF(filePath,
                 folder=folder,
                 metadata=metadata,
                 dimensions=dimensions,
                 variables=variables,
                 ncSize=1.0)
    with NetCDF(filePath, "r") as nc:
        np.testing.assert_array_equal(nc.obj['metadata'], metadata)
        np.testing.assert_array_equal(nc.obj['dimensions'], dimensions)
        np.testing.assert_array_equal(
            nc.obj['variables']['a'], {
                'dimensions': ['nnode'],
                'type': 'f',
                'least_significant_digit': 3,
                'units': 'm',
                'standard_name': '',
                'long_name': '',
                'ftype': 'f'
            })

    createNetCDF(filePath,
                 folder=folder,
                 metadata=metadata,
                 dimensions=dimensions,
                 groups=groups,
                 ncSize=1.0)
    with NetCDF(filePath, "r") as nc:
        np.testing.assert_array_equal(nc.obj['metadata'], metadata)
        np.testing.assert_array_equal(nc.obj['dimensions'], dimensions)
        np.testing.assert_array_equal(
            nc.obj['groups']["s"]['variables'], {
                'u': {
                    'dimensions': ['ntime', 'nnode'],
                    'type': 'f',
                    'units': 'm/s',
                    'standard_name': '',
                    'long_name': '',
                    'ftype': 'f'
                }
            })
예제 #9
0
def test_1():
    folder = "test"

    input = {
        "metadata": {
            "string": "string",
            "integer": 1,
            "float": 0.1,
            "object": {
                "o1": 1,
                "o2": "a"
            }
        },
        "dimensions": {
            "d1": 8,
            "d2": 256,
            "d3": 32,
            "d4": 512,
            "d5": 5,
            "nchar": 6,
            "d0": 1
        },
        "variables": {
            "a": {
                "type": "b",
                "dimensions": ["d2"],
                "units": "m",
                "standard_name": "A Variable",
                "long_name": "Long A Variable",
                "data": np.arange(-128, 128, dtype="byte")
            },
            "b": {
                "type": "f4",
                "stype": "u1",
                "dimensions": ["d2"],
                "max": 255,
                "min": 0,
                "data": np.arange(0, 256, dtype="f4")
            },
            "c": {
                "type": "f4",
                "stype": "u1",
                "dimensions": ["d2"],
                "max": 255,
                "min": 0,
                "data": np.arange(0, 256)
            },
            "d": {
                "type": "b",
                "dimensions": ["d1"],
                "data": np.arange(0, 8)
            },
            "e": {
                "type": "f4",
                "stype": "i4",
                "dimensions": ["d4"],
                "data": np.arange(0, 512, dtype="f4")
            },
            "f": {
                "type": "f4",
                "dimensions": ["d1", "d3"],
                "max": 255,
                "min": 0,
                "data": np.arange(0, 256).reshape((8, 32))
            },
            "g": {
                "type": "B",
                "dimensions": ["d1"],
                "data": np.arange(0, 8)
            },
            "h": {
                "type": "B",
                "dimensions": ["d1"],
                "data": np.arange(0, 8)
            },
            "i": {
                "type":
                "M",
                "dimensions": ["d1"],
                "data":
                np.datetime64('2017-01-01') +
                np.arange(8) * np.timedelta64(1, 'h')
            },
            "j": {
                "type": "d",
                "dimensions": ["d2"],
                "data": np.arange(0, 256, dtype="d")
            },
            "k": {
                "type": "S1",
                "dimensions": ["d5", "nchar"],
                "data": np.array(["a", "bc", "def", "ghij a", "b"])
            },
            "l": {
                "type": "f4",
                "stype": "i2",
                "dimensions": ["d1", "d3"],
                "max": 255,
                "min": 0,
                "data": np.arange(0, 256).reshape((8, 32))
            },
            "m": {
                "type":
                "d",
                "ftype":
                "M",
                "dimensions": ["d1"],
                "data":
                np.datetime64('2017-01-01') +
                np.arange(8) * np.timedelta64(1, 'h')
            },
            "n": {
                "type": "b",
                "dimensions": ["d0"],
                "data": np.arange(0, 1)
            },
            "o": {
                "type": "b",
                "dimensions": ["d0"],
                "data": 1
            },
        },
        "groups": {
            "g1": {
                "metadata": {
                    "shape": [250000, 250000],
                    "integer": 1,
                    "float": 0.1,
                    "object": {
                        "o1": 1,
                        "o2": "a"
                    }
                },
                "dimensions": {
                    "e1": 256
                },
                "variables": {
                    "a": {
                        "type": "b",
                        "dimensions": ["d2"],
                        "units": "m",
                        "standard_name": "A Variable",
                        "long_name": "Long A Variable",
                        "data": np.arange(-128, 128, dtype="byte")
                    },
                }
            }
        },
    }
    for netcdf3 in [False, True]:
        filePath = os.path.join(folder, "test_1.nc")
        obj = copy.deepcopy(input)
        if netcdf3: del obj['groups']
        NetCDF.create(filePath, netcdf3=netcdf3, **obj)
        with NetCDF(filePath, "r") as netcdf:
            np.testing.assert_array_equal(
                netcdf.metadata, {
                    "string": "string",
                    "integer": 1,
                    "float": 0.1,
                    "object": {
                        "o1": 1,
                        "o2": "a"
                    }
                })
            np.testing.assert_array_equal(netcdf['a'][:],
                                          np.arange(-128, 128, dtype="i1"))
            np.testing.assert_array_equal(netcdf['b'][:],
                                          np.arange(0, 256, dtype="f4"))
            np.testing.assert_array_equal(netcdf['c'][:], np.arange(0, 256))
            np.testing.assert_array_equal(netcdf['d'][:], np.arange(0, 8))
            np.testing.assert_almost_equal(netcdf['e'][:],
                                           np.arange(0, 512, dtype="f4"), 4)
            np.testing.assert_array_equal(netcdf['f'][:],
                                          np.arange(0, 256).reshape((8, 32)))
            np.testing.assert_array_equal(netcdf['g'][:], np.arange(0, 8))
            np.testing.assert_array_equal(netcdf['h'][:], np.arange(0, 8))
            np.testing.assert_array_equal(
                netcdf['i'][:],
                np.datetime64('2017-01-01') +
                np.arange(8) * np.timedelta64(1, 'h'))
            np.testing.assert_array_equal(netcdf['j'][:],
                                          np.arange(0, 256, dtype="d"))
            np.testing.assert_array_equal(
                netcdf['k'][:], np.array(["a", "bc", "def", "ghij a", "b"]))
            np.testing.assert_array_equal(netcdf['l'][:],
                                          np.arange(0, 256).reshape((8, 32)))
            np.testing.assert_array_equal(
                netcdf['m'][:],
                np.datetime64('2017-01-01') +
                np.arange(8) * np.timedelta64(1, 'h'))
            np.testing.assert_array_equal(netcdf['n'][:], np.arange(0, 1))
            np.testing.assert_array_equal(netcdf['o'][:], np.arange(1, 2))
            if not netcdf3:
                np.testing.assert_array_equal(netcdf['g1'].metadata['shape'],
                                              np.array([250000, 250000]))
                np.testing.assert_array_equal(netcdf['g1']['a'][:],
                                              np.arange(-128, 128, dtype="i1"))