def __init__(self, url, application=None, session=None, output_grid=True): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) r = GET(ddsurl, application, session) raise_for_status(r) dds = r.text dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) r = GET(dasurl, application, session) raise_for_status(r) das = r.text # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.dtype, var.shape, application=application, session=session) for var in walk(self.dataset, SequenceType): template = copy.copy(var) var.data = SequenceProxy(url, template, application=application, session=session) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if isinstance(target, BaseType): target.data.slice = fix_slice(index, target.shape) elif isinstance(target, GridType): index = fix_slice(index, target.array.shape) target.array.data.slice = index for s, child in zip(index, target.maps): target[child].data.slice = (s, ) elif isinstance(target, SequenceType): target.data.slice = index # retrieve only main variable for grid types: for var in walk(self.dataset, GridType): var.set_output_grid(output_grid)
def __init__(self, url, application=None, session=None, output_grid=True): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + ".dds", query, fragment)) r = GET(ddsurl, application, session) raise_for_status(r) dds = r.text dasurl = urlunsplit((scheme, netloc, path + ".das", query, fragment)) r = GET(dasurl, application, session) raise_for_status(r) das = r.text # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, "&".join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.dtype, var.shape, application=application, session=session) for var in walk(self.dataset, SequenceType): template = copy.copy(var) var.data = SequenceProxy(url, template, application=application, session=session) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if isinstance(target, BaseType): target.data.slice = fix_slice(index, target.shape) elif isinstance(target, GridType): index = fix_slice(index, target.array.shape) target.array.data.slice = index for s, child in zip(index, target.maps): target[child].data.slice = (s,) elif isinstance(target, SequenceType): target.data.slice = index # retrieve only main variable for grid types: for var in walk(self.dataset, GridType): var.set_output_grid(output_grid)
def __init__(self, url): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) r = requests.get(ddsurl) r.raise_for_status() dds = r.text.encode('utf-8') dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) r = requests.get(dasurl) r.raise_for_status() das = r.text.encode('utf-8') # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.descr) for var in walk(self.dataset, SequenceType): var.data = SequenceProxy(url, var.id, var.descr) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if index and isinstance(target.data, BaseProxy): target.data.slice = fix_slice(index, target.shape)
def __init__(self, url): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) dds = requests.get(ddsurl).text.encode('utf-8') dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) das = requests.get(dasurl).text.encode('utf-8') # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.descr) for var in walk(self.dataset, SequenceType): var.data = SequenceProxy(url, var.id, var.descr) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if index and isinstance(target.data, BaseProxy): target.data.slice = fix_slice(index, target.shape)
def __getitem__(self, index): # build download url index = combine_slices(self.slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.baseurl) url = urlunsplit( (scheme, netloc, path + '.dods', self.id + hyperslab(index) + '&' + query, fragment)).rstrip('&') # download and unpack data r = requests.get(url) dds, data = r.content.split('\nData:\n', 1) if self.shape: # skip size packing if self.dtype.char == 'S': data = data[4:] else: data = data[8:] # calculate array size shape = tuple((s.stop - s.start) / s.step for s in index) size = np.prod(shape) if self.dtype == np.byte: return np.fromstring(data[:size], 'B') elif self.dtype.char == 'S': out = [] for word in range(size): n = np.fromstring(data[:4], '>I') # read length data = data[4:] out.append(data[:n]) data = data[n + (-n % 4):] return np.array(out, 'S') else: return np.fromstring(data, self.dtype).reshape(shape)
def __getitem__(self, index): slice_ = combine_slices(self._slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.url) url = urlunsplit(( scheme, netloc, path + '.dods', quote(self.id) + hyperslab(slice_) + '&' + query, fragment)) resp, data = request(url) dds, xdrdata = data.split('\nData:\n', 1) dataset = DDSParser(dds).parse() data = data2 = DapUnpacker(xdrdata, dataset).getvalue() # Retrieve the data from any parent structure(s). for var in walk(dataset): if type(var) in (StructureType, DatasetType): data = data[0] elif var.id == self.id: return data # Some old servers return the wrong response. :-/ # I found a server that would return an array to a request # for an array inside a grid (instead of a structure with # the array); this will take care of it. for var in walk(dataset): if type(var) in (StructureType, DatasetType): data2 = data2[0] elif self.id.endswith(var.id): return data2
def __getitem__(self, index): slice_ = combine_slices(self._slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.url) url = urlunsplit((scheme, netloc, path + '.dods', self.id + hyperslab(slice_) + '&' + query, fragment)) resp, data = request(url) dds, xdrdata = data.split('\nData:\n', 1) dataset = DDSParser(dds).parse() data = data2 = DapUnpacker(xdrdata, dataset).getvalue() # Retrieve the data from any parent structure(s). for var in walk(dataset): if type(var) in (StructureType, DatasetType): data = data[0] elif var.id == self.id: return data # Some old servers return the wrong response. :-/ # I found a server that would return an array to a request # for an array inside a grid (instead of a structure with # the array); this will take care of it. for var in walk(dataset): if type(var) in (StructureType, DatasetType): data2 = data2[0] elif self.id.endswith(var.id): return data2
def test_negative_int(self): """Negative values are converted to positive.""" x = np.arange(10) slice1 = -5 slice2 = fix_slice(slice1, x.shape) self.assertEqual(slice2, (5, )) np.testing.assert_array_equal(x[slice1], x[slice2])
def test_negative_stop(self): """Test for slices with a negative stop.""" x = np.arange(10) slice1 = slice(2, -2) slice2 = fix_slice(slice1, x.shape) self.assertEqual(slice2, (slice(2, 8, 1), )) np.testing.assert_array_equal(x[slice1], x[slice2])
def test_negative_int(self): """Negative values are converted to positive.""" x = np.arange(10) slice1 = -5 slice2 = fix_slice(slice1, x.shape) self.assertEqual(slice2, (5,)) np.testing.assert_array_equal(x[slice1], x[slice2])
def test_negative_stop(self): """Test for slices with a negative stop.""" x = np.arange(10) slice1 = slice(2, -2) slice2 = fix_slice(slice1, x.shape) self.assertEqual(slice2, (slice(2, 8, 1),)) np.testing.assert_array_equal(x[slice1], x[slice2])
def test_ellipsis(self): """Expand Ellipsis to occupy the missing dimensions.""" x = np.arange(6).reshape(2, 3, 1) slice1 = Ellipsis, 0 slice2 = fix_slice(slice1, x.shape) # an Ellipsis is expanded to slice(None) self.assertEqual(slice2, ((slice(0, 2, 1), slice(0, 3, 1), 0))) np.testing.assert_array_equal(x[slice1], x[slice2])
def __getitem__(self, key): if isinstance(key, basestring): return StructureType.__getitem__(self, key) else: key = fix_slice(key, self.shape) out = copy.deepcopy(self) for var, slice_ in zip(out.walk(), [key] + list(key)): var.data = var.data[slice_] var.shape = var.data.shape return out
def test_not_tuple(self): """Non tuples should be converted and handled correctly.""" x = np.arange(10) slice1 = 0 slice2 = fix_slice(slice1, x.shape) # ``fix_slice`` will convert to a tuple self.assertEqual(slice2, (0, )) # assert that the slice is equivalent to the original np.testing.assert_array_equal(x[slice1], x[slice2])
def test_ellipsis(self): """Expand Ellipsis to occupy the missing dimensions.""" x = np.arange(6).reshape(2, 3, 1) slice1 = Ellipsis, 0 slice2 = fix_slice(slice1, x.shape) # an Ellipsis is expanded to slice(None) self.assertEqual( slice2, ((slice(0, 2, 1), slice(0, 3, 1), 0))) np.testing.assert_array_equal(x[slice1], x[slice2])
def test_not_tuple(self): """Non tuples should be converted and handled correctly.""" x = np.arange(10) slice1 = 0 slice2 = fix_slice(slice1, x.shape) # ``fix_slice`` will convert to a tuple self.assertEqual(slice2, (0,)) # assert that the slice is equivalent to the original np.testing.assert_array_equal(x[slice1], x[slice2])
def __getitem__(self, index): # build download url index = combine_slices(self.slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.baseurl) url = urlunsplit(( scheme, netloc, path + '.dods', quote(self.id) + hyperslab(index) + '&' + query, fragment)).rstrip('&') # download and unpack data logger.info("Fetching URL: %s" % url) r = GET(url, self.application, self.session) raise_for_status(r) dds, data = r.body.split(b'\nData:\n', 1) dds = dds.decode(r.content_encoding or 'ascii') if self.shape: # skip size packing if self.dtype.char in 'SU': data = data[4:] else: data = data[8:] # calculate array size shape = tuple( int(np.ceil((s.stop-s.start)/float(s.step))) for s in index) size = int(np.prod(shape)) if self.dtype == np.byte: return np.fromstring(data[:size], 'B').reshape(shape) elif self.dtype.char in 'SU': out = [] for word in range(size): n = np.asscalar(np.fromstring(data[:4], '>I')) # read length data = data[4:] out.append(data[:n]) data = data[n + (-n % 4):] return np.array([text_type(x.decode('ascii')) for x in out], 'S').reshape(shape) else: try: return np.fromstring(data, self.dtype).reshape(shape) except ValueError as e: if str(e) == 'total size of new array must be unchanged': # server-side failure. # it is expected that the user should be mindful of this: raise RuntimeError( ('variable {0} could not be properly ' 'retrieved. To avoid this ' 'error consider using open_url(..., ' 'output_grid=False).').format(quote(self.id))) else: raise
def __getitem__(self, index): # build download url index = combine_slices(self.slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.baseurl) url = urlunsplit( (scheme, netloc, path + ".dods", quote(self.id) + hyperslab(index) + "&" + query, fragment) ).rstrip("&") # download and unpack data logger.info("Fetching URL: %s" % url) r = GET(url, self.application, self.session) raise_for_status(r) dds, data = r.body.split(b"\nData:\n", 1) dds = dds.decode(r.content_encoding or "ascii") if self.shape: # skip size packing if self.dtype.char in "SU": data = data[4:] else: data = data[8:] # calculate array size shape = tuple(int(np.ceil((s.stop - s.start) / float(s.step))) for s in index) size = int(np.prod(shape)) if self.dtype == np.byte: return np.fromstring(data[:size], "B").reshape(shape) elif self.dtype.char in "SU": out = [] for word in range(size): n = np.asscalar(np.fromstring(data[:4], ">I")) # read length data = data[4:] out.append(data[:n]) data = data[n + (-n % 4) :] return np.array([text_type(x.decode("ascii")) for x in out], "S").reshape(shape) else: try: return np.fromstring(data, self.dtype).reshape(shape) except ValueError as e: if str(e) == "total size of new array must be unchanged": # server-side failure. # it is expected that the user should be mindful of this: raise RuntimeError( ( "variable {0} could not be properly " "retrieved. To avoid this " "error consider using open_url(..., " "output_grid=False)." ).format(quote(self.id)) ) else: raise
def __getitem__(self, index): """ Download data for all the tiles containing the request. """ slice_ = combine_slices(self._slice, fix_slice(index, self.shape)) requested = self.parse_request(slice_) with self.lock.readlock: needed = requested & ~self.index[:] # update cache with needed data with self.lock.writelock: for tile in self.get_tiles(needed): self.cache[tile] = super(CachingArrayProxy, self).__getitem__(tile) # update index with newly requested data self.index[:] = self.index[:] | needed return self.cache[slice_]
def __getitem__(self, index): # build download url index = combine_slices(self.slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.baseurl) url = urlunsplit(( scheme, netloc, path + '.dods', quote(self.id) + hyperslab(index) + '&' + query, fragment)).rstrip('&') # download and unpack data logger.info("Fetching URL: %s" % url) r = GET(url, self.application, self.session) raise_for_status(r) dds, data = r.body.split(b'\nData:\n', 1) dds = dds.decode(r.content_encoding or 'ascii') if self.shape: # skip size packing if self.dtype.char in 'SU': data = data[4:] else: data = data[8:] # calculate array size shape = tuple( int(np.ceil((s.stop-s.start)/float(s.step))) for s in index) size = int(np.prod(shape)) if self.dtype == np.byte: return np.fromstring(data[:size], 'B') elif self.dtype.char in 'SU': out = [] for word in range(size): n = np.fromstring(data[:4], '>I') # read length data = data[4:] out.append(data[:n]) data = data[n + (-n % 4):] return np.array([ text_type(x.decode('ascii')) for x in out ], 'S') else: return np.fromstring(data, self.dtype).reshape(shape)
def __getitem__(self, key): out = copy.deepcopy(self) if isinstance(key, ConstraintExpression): scheme, netloc, path, query, fragment = urlsplit(self.url) out.url = urlunsplit(( scheme, netloc, path, str(key & query), fragment)) if out._slice != (slice(None),): warnings.warn('Selection %s will be applied before projection "%s".' % ( key, hyperslab(out._slice))) elif isinstance(key, basestring): out._slice = (slice(None),) out.children = () parent = self.id if ',' in parent: parent = parent.split(',', 1)[0].rsplit('.', 1)[0] out.id = '%s%s.%s' % (parent, hyperslab(self._slice), key) elif isinstance(key, tuple): out.children = key[:] else: out._slice = combine_slices(self._slice, fix_slice(key, (sys.maxint,))) return out
def __getitem__(self, index): # build download url index = combine_slices(self.slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.baseurl) url = urlunsplit(( scheme, netloc, path + '.dods', self.id + hyperslab(index) + '&' + query, fragment)).rstrip('&') # download and unpack data r = requests.get(url) r.raise_for_status() dds, data = r.content.split('\nData:\n', 1) if self.shape: # skip size packing if self.dtype.char == 'S': data = data[4:] else: data = data[8:] # calculate array size shape = tuple((s.stop-s.start)/s.step for s in index) size = np.prod(shape) if self.dtype == np.byte: return np.fromstring(data[:size], 'B') elif self.dtype.char == 'S': out = [] for word in range(size): n = np.fromstring(data[:4], '>I') # read length data = data[4:] out.append(data[:n]) data = data[n + (-n % 4):] return np.array(out, 'S') else: return np.fromstring(data, self.dtype).reshape(shape)
def __getitem__(self, index): index = fix_slice(index, self.shape) # create a new axis if self.axis is None: # get the slice along the aggregation axis, and leave the rest for # the variable itself slice_, index = index[0], index[1:] data = [] for file, n in self.count[slice_]: f = netcdf_file(file) data.append(f.variables[self.name][index]) f.close() return np.array(data).astype(self.dtype) # concatenate along an existing axis else: # convert index to list so we can change it index = list(index) # get the slice along the aggregation axis and store it in a # boolean array that we'll map to the files slice_ = index[self.axis] indexes = np.zeros(self.shape[self.axis], bool) indexes[slice_] = 1 offset = 0 data = [] for file, n in self.count: selected_here = indexes[offset:offset+n] if any(selected_here): index[self.axis] = selected_here f = netcdf_file(file) data.append(f.variables[self.name][tuple(index)]) f.close() offset += n return np.concatenate(data, axis=self.axis).astype(self.dtype)
def __getitem__(self, key): out = copy.deepcopy(self) if isinstance(key, ConstraintExpression): scheme, netloc, path, query, fragment = urlsplit(self.url) out.url = urlunsplit( (scheme, netloc, path, str(key & query), fragment)) if out._slice != (slice(None), ): warnings.warn( 'Selection %s will be applied before projection "%s".' % (key, hyperslab(out._slice))) elif isinstance(key, basestring): out._slice = (slice(None), ) out.children = () parent = self.id if ',' in parent: parent = parent.split(',', 1)[0].rsplit('.', 1)[0] out.id = '%s%s.%s' % (parent, hyperslab(self._slice), key) elif isinstance(key, tuple): out.children = key[:] else: out._slice = combine_slices(self._slice, fix_slice(key, (sys.maxint, ))) return out
def open_url(url): """ Open a given dataset URL, trying different response methods. The function checks the stub DDX method, and falls back to the DDS+DAS responses. It can be easily extended for other representations like JSON. The URL should point to the dataset, omitting any response extensions like ``.dds``. Username and password can be passed in the URL like:: http://user:[email protected]:port/path They will be transmitted as plaintext if the server supports only Basic authentication, so be careful. For Digest authentication this is safe. The URL can point directly to an Opendap dataset, or it can contain any number of contraint expressions (selection/projections):: http://example.com/dataset?var1,var2&var3>10 You can also specify a cache directory, a timeout and a proxy using the global variables from ``pydap.lib``:: >>> import pydap.lib >>> pydap.lib.TIMEOUT = 60 # seconds >>> pydap.lib.CACHE = '.cache' >>> import httplib2 >>> from pydap.util import socks >>> pydap.lib.PROXY = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP, 'localhost', 8000) """ for response in [_ddx, _ddsdas]: dataset = response(url) if dataset: break else: raise ClientError("Unable to open dataset.") # Remove any projections from the url, leaving selections. scheme, netloc, path, query, fragment = urlsplit(url) projection, selection = parse_qs(query) url = urlunsplit( (scheme, netloc, path, '&'.join(selection), fragment)) # Set data to a Proxy object for BaseType and SequenceType. These # variables can then be sliced to retrieve the data on-the-fly. for var in walk(dataset, BaseType): var.data = ArrayProxy(var.id, url, var.shape) for var in walk(dataset, SequenceType): var.data = SequenceProxy(var.id, url) # Set server-side functions. dataset.functions = Functions(url) # Apply the corresponding slices. projection = fix_shn(projection, dataset) for var in projection: target = dataset while var: token, slice_ = var.pop(0) target = target[token] if slice_ and isinstance(target.data, VariableProxy): shape = getattr(target, 'shape', (sys.maxint,)) target.data._slice = fix_slice(slice_, shape) return dataset