def __getitem__(self, index): slice_ = combine_slices(self._slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.url) url = urlunsplit(( scheme, netloc, path + '.dods', quote(self.id) + hyperslab(slice_) + '&' + query, fragment)) resp, data = request(url) dds, xdrdata = data.split('\nData:\n', 1) dataset = DDSParser(dds).parse() data = data2 = DapUnpacker(xdrdata, dataset).getvalue() # Retrieve the data from any parent structure(s). for var in walk(dataset): if type(var) in (StructureType, DatasetType): data = data[0] elif var.id == self.id: return data # Some old servers return the wrong response. :-/ # I found a server that would return an array to a request # for an array inside a grid (instead of a structure with # the array); this will take care of it. for var in walk(dataset): if type(var) in (StructureType, DatasetType): data2 = data2[0] elif self.id.endswith(var.id): return data2
def __init__(self, url): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) r = requests.get(ddsurl) r.raise_for_status() dds = r.text.encode('utf-8') dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) r = requests.get(dasurl) r.raise_for_status() das = r.text.encode('utf-8') # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.descr) for var in walk(self.dataset, SequenceType): var.data = SequenceProxy(url, var.id, var.descr) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if index and isinstance(target.data, BaseProxy): target.data.slice = fix_slice(index, target.shape)
def __getitem__(self, index): slice_ = combine_slices(self._slice, fix_slice(index, self.shape)) scheme, netloc, path, query, fragment = urlsplit(self.url) url = urlunsplit((scheme, netloc, path + '.dods', self.id + hyperslab(slice_) + '&' + query, fragment)) resp, data = request(url) dds, xdrdata = data.split('\nData:\n', 1) dataset = DDSParser(dds).parse() data = data2 = DapUnpacker(xdrdata, dataset).getvalue() # Retrieve the data from any parent structure(s). for var in walk(dataset): if type(var) in (StructureType, DatasetType): data = data[0] elif var.id == self.id: return data # Some old servers return the wrong response. :-/ # I found a server that would return an array to a request # for an array inside a grid (instead of a structure with # the array); this will take care of it. for var in walk(dataset): if type(var) in (StructureType, DatasetType): data2 = data2[0] elif self.id.endswith(var.id): return data2
def __init__(self, url): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) dds = requests.get(ddsurl).text.encode('utf-8') dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) das = requests.get(dasurl).text.encode('utf-8') # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.descr) for var in walk(self.dataset, SequenceType): var.data = SequenceProxy(url, var.id, var.descr) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if index and isinstance(target.data, BaseProxy): target.data.slice = fix_slice(index, target.shape)
def __init__(self, url, application=None, session=None, output_grid=True): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + '.dds', query, fragment)) r = GET(ddsurl, application, session) raise_for_status(r) dds = r.text dasurl = urlunsplit((scheme, netloc, path + '.das', query, fragment)) r = GET(dasurl, application, session) raise_for_status(r) das = r.text # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, '&'.join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.dtype, var.shape, application=application, session=session) for var in walk(self.dataset, SequenceType): template = copy.copy(var) var.data = SequenceProxy(url, template, application=application, session=session) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if isinstance(target, BaseType): target.data.slice = fix_slice(index, target.shape) elif isinstance(target, GridType): index = fix_slice(index, target.array.shape) target.array.data.slice = index for s, child in zip(index, target.maps): target[child].data.slice = (s, ) elif isinstance(target, SequenceType): target.data.slice = index # retrieve only main variable for grid types: for var in walk(self.dataset, GridType): var.set_output_grid(output_grid)
def dispatch(dataset): buf = StringIO() wb = Workbook(buf, {'constant_memory': True, 'in_memory': True, 'default_date_format': 'yyyy/mm/dd hh:mm:ss'} ) format_ = wb.add_format(FORMAT) # dataset metadata ws = wb.add_worksheet('Global attributes') write_metadata(ws, dataset, 0, 0, format_) # 1D grids for grid in [g for g in walk(dataset, GridType) if len(g.shape) == 1]: logger.debug('Grid {}'.format(grid.name)) ws = wb.add_worksheet(grid.name) # headers ws.write(0, 0, grid.dimensions[0], format_) ws.write(0, 1, grid.name, format_) # data for j, data in enumerate(grid.data): for i, value in enumerate(numpy.asarray(data)): ws.write(i+1, 1-j, value) # add var metadata write_metadata(ws, grid, 0, 2, format_) # sequences for seq in walk(dataset, SequenceType): logger.debug('Sequence {}'.format(seq.name)) ws = wb.add_worksheet(seq.name) # add header across the first row for j, var_ in enumerate(seq.keys()): ws.write(0, j, var_, format_) # add data in the subsequent rows for i, row in enumerate(seq.data): for j, value in enumerate(row): ws.write(i+1, j, value) # add var metadata in columns to the right of the data n = 0 j = len(seq.keys())+1 for child in seq.children(): logger.debug("Child {}".format(child.name)) ws.merge_range(n, j, n, j+1, child.name, format_) n = write_metadata(ws, child, n+1, j, format_)+1 wb.close() return buf
def __iter__(self): dataset = self.dataset buf = StringIO() buf = BytesIO() mdict = {} #mdict = { dataset.name: {} } #target = mdict[dataset.name] # Global attributes. We need to skip empty dictionaries since savemat # doesn't work with them. #if dataset.attributes: # target['attributes'] = dataset.attributes.copy() # Gridded data. for grid in walk(dataset, GridType): target = mdict[dataset.name][grid.name] = {} # Add dimensions. for map_ in grid.maps.values(): target[map_.name] = { 'data': numpy.asarray(map_, dtype='f'), } if map_.attributes: target[map_.name]['attributes'] = map_.attributes.copy() # Add the var. target[grid.array.name] = { 'data': numpy.asarray(grid.array, dtype='f'), } if grid.array.attributes: target[grid.array. name]['attributes'] = grid.array.attributes.copy() # Sequences. for seq in walk(dataset, SequenceType): #target = mdict[dataset.name][seq.name] = {} # Add vars. #for child in seq.walk(): for child in seq.children(): #target[child.name] = { # 'data': numpy.fromiter(child.data, child.dtype), #'data': numpy.fromiter(child.data, child.type.typecode), #} #target[child.name] = numpy.fromiter(child.data, child.dtype) mdict[child.id.replace('.', '_')] = numpy.fromiter( child.data, child.dtype) #if child.attributes: # target[child.name]['attributes'] = child.attributes.copy() savemat(buf, mdict) yield buf.getvalue() return return [buf.getvalue()] for line in matlab(self.dataset): yield line.encode('ascii')
def __init__(self, url, application=None, session=None, output_grid=True): # download DDS/DAS scheme, netloc, path, query, fragment = urlsplit(url) ddsurl = urlunsplit((scheme, netloc, path + ".dds", query, fragment)) r = GET(ddsurl, application, session) raise_for_status(r) dds = r.text dasurl = urlunsplit((scheme, netloc, path + ".das", query, fragment)) r = GET(dasurl, application, session) raise_for_status(r) das = r.text # build the dataset from the DDS and add attributes from the DAS self.dataset = build_dataset(dds) add_attributes(self.dataset, parse_das(das)) # remove any projection from the url, leaving selections projection, selection = parse_ce(query) url = urlunsplit((scheme, netloc, path, "&".join(selection), fragment)) # now add data proxies for var in walk(self.dataset, BaseType): var.data = BaseProxy(url, var.id, var.dtype, var.shape, application=application, session=session) for var in walk(self.dataset, SequenceType): template = copy.copy(var) var.data = SequenceProxy(url, template, application=application, session=session) # apply projections for var in projection: target = self.dataset while var: token, index = var.pop(0) target = target[token] if isinstance(target, BaseType): target.data.slice = fix_slice(index, target.shape) elif isinstance(target, GridType): index = fix_slice(index, target.array.shape) target.array.data.slice = index for s, child in zip(index, target.maps): target[child].data.slice = (s,) elif isinstance(target, SequenceType): target.data.slice = index # retrieve only main variable for grid types: for var in walk(self.dataset, GridType): var.set_output_grid(output_grid)
def calculate_size(dataset): size = 0 for var in walk(dataset): # Pydap can't calculate the size of a dataset with a # Sequence since the data is streamed directly. if (isinstance(var, SequenceType) or (isinstance(var, BaseType) and var.type in [Url, String])): return None elif isinstance(var, BaseType): # account for array size marker if var.shape: size += 8 # calculate size if var.shape == (): vsize = 1 else: vsize = numpy.prod(var.shape) if var.type == Byte: size += -vsize % 4 else: size += vsize * var.type.size # account for DDS size += len(''.join(dds_dispatch(dataset))) + len('Data:\n') return str(size)
def serialize(dataset): fix_map_attributes(dataset) fig = Figure(figsize=figsize, dpi=dpi) fig.figurePatch.set_alpha(0.0) ax = fig.add_axes([0.05, 0.05, 0.45, 0.85]) ax.axesPatch.set_alpha(0.5) # Plot requested grids. layers = [ layer for layer in query.get('LAYERS', '').split(',') if layer ] or [var.id for var in walk(dataset, GridType)] layer = layers[0] names = [dataset] + layer.split('.') grid = reduce(operator.getitem, names) actual_range = self._get_actual_range(grid) norm = Normalize(vmin=actual_range[0], vmax=actual_range[1]) cb = ColorbarBase(ax, cmap=get_cmap(cmap), norm=norm, orientation='vertical') for tick in cb.ax.get_yticklabels(): tick.set_fontsize(14) tick.set_color('white') #tick.set_fontweight('bold') # Save to buffer. canvas = FigureCanvas(fig) output = StringIO() canvas.print_png(output) if hasattr(dataset, 'close'): dataset.close() return [output.getvalue()]
def density(dataset, salinity, temperature, pressure): """Calculate in-situ density. This function calculated in-situ density from absolute salinity and conservative temperature, using the `gsw.rho` function. Returns a new sequence with the data. """ # find sequence for sequence in walk(dataset, SequenceType): break else: raise ConstraintExpressionError( 'Function "bounds" should be used on a Sequence.') selection = sequence[salinity.name, temperature.name, pressure.name] rows = [tuple(row) for row in selection] data = np.rec.fromrecords( rows, names=['salinity', 'temperature', 'pressure']) rho = gsw.rho(data['salinity'], data['temperature'], data['pressure']) out = SequenceType("result") out['rho'] = BaseType("rho", units="kg/m**3") out.data = np.rec.fromrecords(rho.reshape(-1, 1), names=['rho']) return out
def add_attributes(dataset, attributes): """Add attributes from a parsed DAS to a dataset. Returns the dataset with added attributes. """ dataset.attributes['NC_GLOBAL'] = attributes.get('NC_GLOBAL', {}) dataset.attributes['DODS_EXTRA'] = attributes.get('DODS_EXTRA', {}) for var in list(walk(dataset))[::-1]: # attributes can be flat, eg, "foo.bar" : {...} if var.id in attributes: var.attributes.update(attributes.pop(var.id)) # or nested, eg, "foo" : { "bar" : {...} } try: nested = reduce(operator.getitem, [attributes] + var.id.split('.')[:-1]) k = var.id.split('.')[-1] var.attributes.update(nested.pop(k)) except KeyError: pass # add attributes that don't belong to any child for k, v in attributes.items(): dataset.attributes[k] = v return dataset
def serialize(dataset): buf = StringIO() df = pd.DataFrame({'foo': [1, 2, 3, 4], 'bar': [54, 32, 91, 57]}) df.to_xslx(buf) return [buf.getvalue()] wb = Workbook() # dataset metadata ws = wb.add_sheet('Global attributes') write_metadata(ws, dataset, 0, 0) # sequences for seq in walk(dataset, SequenceType): ws = wb.add_sheet(seq.name) # add header for j, var_ in enumerate(seq.keys()): ws.write(0, j, var_, HEADER) # add data for i, row in enumerate(seq.data): for j, value in enumerate(row): ws.write(i + 1, j, value) # add var metadata n = 0 j = len(seq.keys()) + 1 for child in seq.walk(): ws.write_merge(n, n, j, j + 1, child.name, HEADER) n = write_metadata(ws, child, n + 1, j) + 1 wb.save(buf) return [buf.getvalue()]
def density(dataset, salinity, temperature, pressure): """Calculate in-situ density. This function calculated in-situ density from absolute salinity and conservative temperature, using the `gsw.rho` function. Returns a new sequence with the data. """ # find sequence for sequence in walk(dataset, SequenceType): break else: raise ConstraintExpressionError( 'Function "bounds" should be used on a Sequence.') selection = sequence[salinity.name, temperature.name, pressure.name] rows = [tuple(row) for row in selection] data = np.rec.fromrecords(rows, names=['salinity', 'temperature', 'pressure']) rho = gsw.rho(data['salinity'], data['temperature'], data['pressure']) out = SequenceType("result") out['rho'] = BaseType("rho", units="kg/m**3") out.data = np.rec.fromrecords(rho.reshape(-1, 1), names=['rho']) return out
def calculate_size(dataset): """ Calculate the size of the response. """ length = 0 for var in walk(dataset): # Pydap can't calculate the size of sequences since the data is streamed # directly from the source. Also, strings are encoded individually, so # it's not possible to get their size unless we read everything. if (isinstance(var, SequenceType) or (isinstance(var, BaseType) and var.data.dtype.char == 'S')): return None elif isinstance(var, BaseType): if var.shape: length += 8 # account for array size marker size = np.prod(var.shape) if var.data.dtype == np.byte: length += size + (-size % 4) elif var.data.dtype == np.short: length += size * 4 else: length += size * var.data.dtype.itemsize # account for DDS length += len(''.join(dds_dispatch(dataset))) + len('Data:\n') return str(length)
def calculate_size(dataset): """Calculate the size of the response. Returns the size in bytes.""" length = 0 for var in walk(dataset): # Pydap can't calculate the size of sequences since the data is # streamed directly from the source. Also, strings are encoded # individually, so it's not possible to get their size unless we read # everything. if (isinstance(var, SequenceType) or (isinstance(var, BaseType) and var.dtype.char in 'SU')): return None elif isinstance(var, BaseType): if var.shape: length += 8 # account for array size marker size = int(np.prod(var.shape)) if var.data.dtype == np.byte: length += size + (-size % 4) elif var.data.dtype == np.short: length += size * 4 else: opendap_size = np.dtype(typemap[var.data.dtype.char]).itemsize length += size * opendap_size # account for DDS length += len(''.join(dds(dataset))) + len(b'Data:\n') return length
def add_attributes(dataset, attributes): """Add attributes from a parsed DAS to a dataset. Returns the dataset with added attributes. """ dataset.attributes['NC_GLOBAL'] = attributes.get('NC_GLOBAL', {}) dataset.attributes['DODS_EXTRA'] = attributes.get('DODS_EXTRA', {}) for var in list(walk(dataset))[::-1]: # attributes can be flat, eg, "foo.bar" : {...} if var.id in attributes: var.attributes.update(attributes.pop(var.id)) # or nested, eg, "foo" : { "bar" : {...} } try: nested = reduce( operator.getitem, [attributes] + var.id.split('.')[:-1]) k = var.id.split('.')[-1] var.attributes.update(nested.pop(k)) except KeyError: pass # add attributes that don't belong to any child for k, v in attributes.items(): dataset.attributes[k] = v return dataset
def serialize(dataset): fix_map_attributes(dataset) fig = Figure(figsize=figsize, dpi=dpi) fig.figurePatch.set_alpha(0.0) ax = fig.add_axes([0.05, 0.05, 0.45, 0.85]) ax.axesPatch.set_alpha(0.5) # Plot requested grids. layers = [layer for layer in query.get('LAYERS', '').split(',') if layer] or [var.id for var in walk(dataset, GridType)] layer = layers[0] names = [dataset] + layer.split('.') grid = reduce(operator.getitem, names) actual_range = self._get_actual_range(grid) norm = Normalize(vmin=actual_range[0], vmax=actual_range[1]) cb = ColorbarBase(ax, cmap=get_cmap(cmap), norm=norm, orientation='vertical') for tick in cb.ax.get_yticklabels(): tick.set_fontsize(14) tick.set_color('white') #tick.set_fontweight('bold') # Save to buffer. canvas = FigureCanvas(fig) output = StringIO() canvas.print_png(output) if hasattr(dataset, 'close'): dataset.close() return [ output.getvalue() ]
def __iter__(self): grids = walk(self.dataset, GridType) def generate_aaigrid_files(grid): '''Generator that yields multiple file names for each layer of the grid parameter This function delegates the actual creation of the '.asc' files to _grid_array_to_gdal_files() Files get writted to temp space on disk (by the delegatee) and then filenames are yielded from this generator ''' logger.debug("In generate_aaigrid_files for grid {}".format(grid)) missval = find_missval(grid) srs = self.srs geo_transform = detect_dataset_transform(grid) output_fmt = grid.name + '_{i}.asc' for file_ in _grid_array_to_gdal_files(grid.array, srs, geo_transform, filename_fmt=output_fmt, missval=missval): yield file_ # Send each of the grids through _grid_array_to_gdal_files # which will generate multiple files per grid logger.debug("__iter__: creating the file generator for grids {}".format(grids)) file_generator = chain.from_iterable(imap(generate_aaigrid_files, grids)) return ziperator(file_generator)
def add_attributes(dataset, attributes): """ Add attributes from a parsed DAS to a dataset. """ dataset.attributes['NC_GLOBAL'] = attributes.get('NC_GLOBAL', {}) dataset.attributes['DODS_EXTRA'] = attributes.get('DODS_EXTRA', {}) # add attributes that don't belong to any child for k, v in attributes.items(): if k not in dataset: dataset.attributes[k] = v for var in walk(dataset): # attributes can be flat, eg, "foo.bar" : {...} if var.id in attributes: var.attributes.update(attributes[var.id]) # or nested, eg, "foo" : { "bar" : {...} } try: var.attributes.update( reduce(operator.getitem, [attributes] + var.id.split('.'))) except KeyError: pass return dataset
def __iter__(self): nc = self.nc # Hack to find the variables if they're nested in the tree var2id = {} for recvar in nc.variables.keys(): for dstvar in walk(self.dataset, BaseType): if recvar == dstvar.name: var2id[recvar] = dstvar.id continue def type_generator(input): epoch = datetime(1970, 1, 1) # is this a "scalar" (i.e. a standard python object) # if so, it needs to be a numpy array, or at least have 'dtype' and 'byteswap' attributes for value in input: if isinstance(value, (type(None), str, int, float, bool, datetime)): # special case datetimes, since dates aren't supported by NetCDF3 if type(value) == datetime: since_epoch = (value - epoch).total_seconds() yield np.array(since_epoch / 3600. / 24., dtype='Float32') # days since epoch else: yield np.array(value) else: yield value def nonrecord_input(): for varname in nc.non_recvars.keys(): debug("Iterator for %s", varname) dst_var = get_var(self.dataset, var2id[varname]).data # skip 0-d variables if not dst_var.shape: continue # Make sure that all elements of the list are iterators for x in dst_var: yield x debug("Done with nonrecord input") # Create a generator for the record variables recvars = nc.recvars.keys() def record_generator(nc, dst, table): debug("record_generator() for dataset %s", dst) vars = [ iter(get_var(dst, table[varname])) for varname in nc.recvars.keys() ] while True: for var in vars: try: yield var.next() except StopIteration: raise more_input = type_generator(record_generator(nc, self.dataset, var2id)) # Create a single pipeline which includes the non-record and record variables pipeline = nc_generator(nc, chain(type_generator(nonrecord_input()), more_input)) # Generate the netcdf stream for block in pipeline: yield block
def dispatch(dataset): buf = StringIO() wb = Workbook() # dataset metadata ws = wb.add_sheet("Global attributes") write_metadata(ws, dataset, 0, 0) # 1D grids for grid in [g for g in walk(dataset, GridType) if len(g.shape) == 1]: ws = wb.add_sheet(grid.name) # headers ws.write(0, 0, grid.dimensions[0], HEADER) ws.write(0, 1, grid.name, HEADER) # data for j, data in enumerate(grid.data): for i, value in enumerate(numpy.asarray(data)): ws.write(i + 1, 1 - j, value) # add var metadata write_metadata(ws, grid, 0, 2) # sequences for seq in walk(dataset, SequenceType): ws = wb.add_sheet(seq.name) # add header for j, var_ in enumerate(seq.keys()): ws.write(0, j, var_, HEADER) # add data for i, row in enumerate(seq.data): for j, value in enumerate(row): ws.write(i + 1, j, value) # add var metadata n = 0 j = len(seq.keys()) + 1 for child in seq.children(): ws.write_merge(n, n, j, j + 1, child.name, HEADER) n = write_metadata(ws, child, n + 1, j) + 1 wb.save(buf) return [buf.getvalue()]
def dispatch(dataset): buf = StringIO() wb = Workbook() # dataset metadata ws = wb.add_sheet('Global attributes') write_metadata(ws, dataset, 0, 0) # 1D grids for grid in [g for g in walk(dataset, GridType) if len(g.shape) == 1]: ws = wb.add_sheet(grid.name) # headers ws.write(0, 0, grid.dimensions[0], HEADER) ws.write(0, 1, grid.name, HEADER) # data for j, data in enumerate(grid.data): for i, value in enumerate(numpy.asarray(data)): ws.write(i + 1, 1 - j, value) # add var metadata write_metadata(ws, grid, 0, 2) # sequences for seq in walk(dataset, SequenceType): ws = wb.add_sheet(seq.name) # add header for j, var_ in enumerate(seq.keys()): ws.write(0, j, var_, HEADER) # add data for i, row in enumerate(seq.data): for j, value in enumerate(row): ws.write(i + 1, j, value) # add var metadata n = 0 j = len(seq.keys()) + 1 for child in seq.children(): ws.write_merge(n, n, j, j + 1, child.name, HEADER) n = write_metadata(ws, child, n + 1, j) + 1 wb.save(buf) return [buf.getvalue()]
def bounds(dataset, xmin, xmax, ymin, ymax, zmin, zmax, tmin, tmax): r"""Bound a sequence in space and time. This function is used by GrADS to access Sequences, eg: http://server.example.com/dataset.dods?sequence& \ bounds(0,360,-90,90,500,500,00Z01JAN1970,00Z01JAN1970) We assume the dataset has only a single Sequence, which will be returned modified in place. """ # find sequence for sequence in walk(dataset, SequenceType): break # get first sequence else: raise ConstraintExpressionError( 'Function "bounds" should be used on a Sequence.') for child in sequence.children(): if child.attributes.get('axis', '').lower() == 'x': if xmin == xmax: sequence.data = sequence[child == xmin].data else: sequence.data = sequence[ (child >= xmin) & (child <= xmax)].data elif child.attributes.get('axis', '').lower() == 'y': if ymin == ymax: sequence.data = sequence[child == ymin].data else: sequence.data = sequence[ (child >= ymin) & (child <= ymax)].data elif child.attributes.get('axis', '').lower() == 'z': if zmin == zmax: sequence.data = sequence[child == zmin].data else: sequence.data = sequence[ (child >= zmin) & (child <= zmax)].data elif child.attributes.get('axis', '').lower() == 't': start = datetime.strptime(tmin, '%HZ%d%b%Y') end = datetime.strptime(tmax, '%HZ%d%b%Y') units = child.attributes.get('units', 'seconds since 1970-01-01') # if start and end are equal, add the step if start == end and 'grads_step' in child.attributes: dt = parse_step(child.attributes['grads_step']) end = start + dt tmin = coards.format(start, units) tmax = coards.format(end, units) sequence.data = sequence[ (child >= tmin) & (child < tmax)].data else: tmin = coards.format(start, units) tmax = coards.format(end, units) sequence.data = sequence[ (child >= tmin) & (child <= tmax)].data return sequence
def serialize(dataset): fix_map_attributes(dataset) grids = [grid for grid in walk(dataset, GridType) if is_valid(grid, dataset)] # Set global lon/lat ranges. try: lon_range = self.cache.get_value('lon_range') except (KeyError, AttributeError): try: lon_range = dataset.attributes['NC_GLOBAL']['lon_range'] except KeyError: lon_range = [np.inf, -np.inf] for grid in grids: lon = np.asarray(get_lon(grid, dataset)[:]) lon_range[0] = min(lon_range[0], np.min(lon)) lon_range[1] = max(lon_range[1], np.max(lon)) if self.cache: self.cache.set_value('lon_range', lon_range) try: lat_range = self.cache.get_value('lat_range') except (KeyError, AttributeError): try: lat_range = dataset.attributes['NC_GLOBAL']['lat_range'] except KeyError: lat_range = [np.inf, -np.inf] for grid in grids: lat = np.asarray(get_lat(grid, dataset)[:]) lat_range[0] = min(lat_range[0], np.min(lat)) lat_range[1] = max(lat_range[1], np.max(lat)) if self.cache: self.cache.set_value('lat_range', lat_range) # Remove ``REQUEST=GetCapabilites`` from query string. location = construct_url(environ, with_query_string=True) base = location.split('REQUEST=')[0].rstrip('?&') context = { 'dataset': dataset, 'location': base, 'layers': grids, 'lon_range': lon_range, 'lat_range': lat_range, } # Load the template using the specified renderer, or fallback to the # default template since most of the people won't bother installing # and/or creating a capabilities template -- this guarantees that the # response will work out of the box. try: renderer = environ['pydap.renderer'] template = renderer.loader('capabilities.xml') except (KeyError, TemplateNotFound): renderer = self.renderer template = renderer.loader('capabilities.xml') output = renderer.render(template, context, output_format='text/xml') if hasattr(dataset, 'close'): dataset.close() return [output.encode('utf-8')]
def apply_projection(projection, dataset): """ Apply a given projection to a dataset. The function returns a new dataset object, after applying the projection to the original dataset. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) debug('in apply_projection') for var in projection: debug("Looping over %s", var) target, template = out, dataset while var: name, slice_ = var.pop(0) # FIXME: Need more checks for whether the projection is valid or not candidate = template[name] # apply slice if slice_: debug("Slicing %s with slice %s", name, slice_) if isinstance(candidate, BaseType): candidate.data = candidate[slice_] elif isinstance(candidate, SequenceType): candidate = candidate[slice_[0]] elif isinstance(candidate, GridType): if len(candidate.maps) != len(slice_): raise HTTPBadRequest("Attempt to slice grid with %d maps with slice (%s) of length %d" % (len(candidate.maps), slice_, len(slice_))) candidate = candidate[slice_] # handle structures if isinstance(candidate, StructureType): debug("instance is a StructureType") # add variable to target if name not in target.keys(): if var: # if there are more children to add we need to clear the # candidate so it has only explicitly added children; # also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType(candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data, including only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data debug('out of apply_projection()') return out
def serialize(dataset): buf = StringIO() mdict = {dataset.name: {}} target = mdict[dataset.name] # Global attributes. We need to skip empty dictionaries since savemat # doesn't work with them. if dataset.attributes: target['attributes'] = dataset.attributes.copy() # Gridded data. for grid in walk(dataset, GridType): target = mdict[dataset.name][grid.name] = {} # Add dimensions. for map_ in grid.maps.values(): target[map_.name] = { 'data': numpy.asarray(map_, dtype='f'), } if map_.attributes: target[map_.name]['attributes'] = map_.attributes.copy() # Add the var. target[grid.array.name] = { 'data': numpy.asarray(grid.array, dtype='f'), } if grid.array.attributes: target[grid.array. name]['attributes'] = grid.array.attributes.copy() # Sequences. for seq in walk(dataset, SequenceType): target = mdict[dataset.name][seq.name] = {} # Add vars. for child in seq.walk(): target[child.name] = { 'data': numpy.fromiter(child.data, child.type.typecode), } if child.attributes: target[child.name]['attributes'] = child.attributes.copy() savemat(buf, mdict) return [buf.getvalue()]
def _set_data(self, data): if isinstance(data, SequenceData): self._data = data for var in walk(self, (BaseType, SequenceType)): if var is not self: id_ = var.id[len(self.id)+1:] var.data = data[id_] else: self._data = None for i, var in enumerate(self.walk()): var.data = numpy.asarray( get_row(data, i, self._nesting_level), 'O')
def serialize(dataset): fix_map_attributes(dataset) fig = Figure(figsize=figsize, dpi=dpi) ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) # Set transparent background; found through http://sparkplot.org/browser/sparkplot.py. if asbool(query.get('TRANSPARENT', 'true')): fig.figurePatch.set_alpha(0.0) ax.axesPatch.set_alpha(0.0) # Plot requested grids (or all if none requested). layers = [layer for layer in query.get('LAYERS', '').split(',') if layer] or [var.id for var in walk(dataset, GridType)] for layer in layers: names = [dataset] + layer.split('.') grid = reduce(operator.getitem, names) if is_valid(grid, dataset): self._plot_grid(dataset, grid, time, bbox, (w, h), ax, cmap) # Save to buffer. ax.axis( [bbox[0], bbox[2], bbox[1], bbox[3]] ) ax.axis('off') canvas = FigureCanvas(fig) output = StringIO() # Optionally convert to paletted png paletted = asbool(environ.get('pydap.responses.wms.paletted', 'false')) if paletted: # Read image buf, size = canvas.print_to_buffer() im = Image.frombuffer('RGBA', size, buf, 'raw', 'RGBA', 0, 1) # Find number of colors colors = im.getcolors(256) # Only convert if the number of colors is less than 256 if colors is not None: ncolors = len(colors) # Get alpha band alpha = im.split()[-1] # Convert to paletted image im = im.convert("RGB") im = im.convert("P", palette=Image.ADAPTIVE, colors=ncolors) # Set all pixel values below ncolors to 1 and the rest to 0 mask = Image.eval(alpha, lambda a: 255 if a <=128 else 0) # Paste the color of index ncolors and use alpha as a mask im.paste(ncolors, mask) # Truncate palette to actual size to save space im.palette.palette = im.palette.palette[:3*(ncolors+1)] im.save(output, 'png', optimize=False, transparency=ncolors) else: canvas.print_png(output) else: canvas.print_png(output) if hasattr(dataset, 'close'): dataset.close() return [ output.getvalue() ]
def apply_projection(projection, dataset): """Apply a given projection to a dataset. This function builds and returns a new dataset by adding those variables that were requested on the projection. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) # first collect all the variables for p in projection: target, template = out, dataset for i, (name, slice_) in enumerate(p): candidate = template[name] # add variable to target if isinstance(candidate, StructureType): if name not in target.keys(): if i < len(p) - 1: # if there are more children to add we need to clear # the candidate so it has only explicitly added # children; also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType(candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data to include only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data # apply slices for p in projection: target = out for name, slice_ in p: target, parent = target[name], target if slice_: if isinstance(target, BaseType): target.data = target[slice_] elif isinstance(target, SequenceType): parent[name] = target[slice_[0]] elif isinstance(target, GridType): parent[name] = target[slice_] else: raise ConstraintExpressionError("Invalid projection!") return out
def apply_projection(projection, dataset): """Apply a given projection to a dataset. This function builds and returns a new dataset by adding those variables that were requested on the projection. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) # first collect all the variables for p in projection: target, template = out, dataset for i, (name, slice_) in enumerate(p): candidate = template[name] # add variable to target if isinstance(candidate, StructureType): if name not in target.keys(): if i < len(p) - 1: # if there are more children to add we need to clear # the candidate so it has only explicitly added # children; also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType( candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data to include only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data # apply slices for p in projection: target = out for name, slice_ in p: target, parent = target[name], target if slice_: if isinstance(target, BaseType): target.data = target[slice_] elif isinstance(target, SequenceType): parent[name] = target[slice_[0]] elif isinstance(target, GridType): parent[name] = target[slice_] else: raise ConstraintExpressionError("Invalid projection!") return out
def wrap_arrayterator(dataset, size): """ Wrap `BaseType` objects in an Arrayterator. Since the buffer size of the Arrayterator is in elements, not bytes, we convert according to the data item size. """ for var in walk(dataset, BaseType): elements = size // var.data.dtype.itemsize var.data = Arrayterator(var.data, elements) return dataset
def apply_selection(selection, dataset): """ Apply a given selection to a dataset, modifying it inplace. """ for seq in walk(dataset, SequenceType): # apply only relevant selections conditions = [condition for condition in selection if re.match('%s\.[^\.]+(<=|<|>=|>|=|!=)' % re.escape(seq.id), condition)] for condition in conditions: id1, op, id2 = parse_selection(condition, dataset) seq.data = seq[ op(id1, id2) ].data return dataset
def apply_selection(selection, dataset): """ Apply a given selection to a dataset, modifying it inplace. """ for seq in walk(dataset, SequenceType): # apply only relevant selections conditions = [ condition for condition in selection if re.match('%s\.[^\.]+(<=|<|>=|>|=|!=)' % re.escape(seq.id), condition) ] for condition in conditions: id1, op, id2 = parse_selection(condition, dataset) seq.data = seq[op(id1, id2)].data return dataset
def apply_projection(projection, dataset): """ Apply a given projection to a dataset. The function returns a new dataset object, after applying the projection to the original dataset. """ out = DatasetType(name=dataset.name, attributes=dataset.attributes) for var in projection: target, template = out, dataset while var: name, slice_ = var.pop(0) candidate = template[name] # apply slice if slice_: if isinstance(candidate, BaseType): candidate.data = candidate[slice_] elif isinstance(candidate, SequenceType): candidate = candidate[slice_[0]] elif isinstance(candidate, GridType): candidate = candidate[slice_] # handle structures if isinstance(candidate, StructureType): # add variable to target if name not in target.keys(): if var: # if there are more children to add we need to clear the # candidate so it has only explicitly added children; # also, Grids are degenerated into Structures if isinstance(candidate, GridType): candidate = StructureType(candidate.name, candidate.attributes) candidate._keys = [] target[name] = candidate target, template = target[name], template[name] else: target[name] = candidate # fix sequence data, including only variables that are in the sequence for seq in walk(out, SequenceType): seq.data = get_var(dataset, seq.id)[tuple(seq.keys())].data return out
def __call__(self, environ, start_response): req = Request(environ) if "." in req.path_info: basename, response = req.path_info.rsplit(".", 1) else: basename, response = req.path_info, None # cache a local copy if response in self.responses: url = urljoin(self.url, basename) dataset = open_url(url) cachepath = os.path.join(self.cachedir, basename.replace("/", "_")) # here we an use mstat to check the mtime of the file, and # do a HEAD on the dataset to compare with Last-Modified header r = requests.head(url + ".dods") if "last-modified" in r.headers: last_modified = time.mktime(parsedate(r.headers["last-modified"])) mtime = time.mktime(time.localtime(os.stat(cachepath)[ST_MTIME])) if last_modified > mtime: os.unlink(cachepath) # replace data with a caching version for var in walk(dataset, BaseType): var.data = CachingArrayProxy( cachepath, self.tilesize, self.maxsize, var.type, var.id, var.data.url, var.data.shape, var.data._slice, ) # for var in walk(dataset, SequenceType): # var.data = CachingSequenceProxy( # cachepath, # var.id, # var.data.url, var.data.slice, var.data._slice, var.data.children) app = SimpleHandler(dataset) # pass this upstream else: app = Proxy(self.url) return app(environ, start_response)
def __iter__(self): dataset = self.dataset #buf = StringIO() buf = BytesIO() wb = Workbook() #ws = wb.create_sheet('Global attributes') #write_metadata(ws, self.dataset, 0, 0) for seq in walk(dataset, SequenceType): ws = wb.create_sheet(seq.id) # add header # for j, var_ in enumerate(seq.keys()): # ws.write(0, j, var_) # add data for i, row in enumerate(seq.data): #for j, value in enumerate(seq.data): #for j, value in enumerate(row): ws.append([value for value in row]) #ws.write(i+1, j, value) # add var metadata #n = 0 #j = len(seq.keys())+1 #for child in seq.walk(): #ws.write_merge(n, n, j, j+1, child.name, HEADER) # ws.write(n, n, j, j+1, child.name, HEADER) # n = write_metadata(ws, child, n+1, j)+1 # pass wb.save(buf) print(buf) yield buf.getvalue() return [buf.getvalue()] # df = pd.DataFrame({'foo':[1,2,3,4], 'bar':[54,32,91, 57]}) # temp_file = tempfile.NamedTempFile() # df.to_excel(temp_file) wb.save(buf) return [buf.getvalue()] with open(temp_file, 'rb') as f: yield f.read()
def __init__(self, dataset): if not dataset: raise HTTPBadRequest("The ArcASCII Grid (aaigrid) response did not receive required dataset parameter") # We will (should?) always get a _DatasetType_ and should use pydap.lib.walk to walk through all of the Grids self.grids = walk(dataset, GridType) if not self.grids: raise HTTPBadRequest("The ArcASCII Grid (aaigrid) response only supports GridTypes, yet none are included in the requested dataset: {}".format(dataset)) for grid in self.grids: l = len(grid.maps) if l not in (2, 3): raise HTTPBadRequest("The ArcASCII Grid (aaigrid) response only supports Grids with 2 or 3 dimensions, but one of the requested grids contains {} dimension{}".format(l, 's' if l > 1 else '')) try: detect_dataset_transform(grid) except Exception, e: raise HTTPBadRequest("The ArcASCII Grid (aaigrid) response could not detect the grid transform for grid {}: {}".format(grid.name, e.message))
def test_body(self): """Test the HTML response. We use BeautifulSoup to parse the response, and check for some elements that should be there. """ res = self.app.get('/.html') soup = BeautifulSoup(res.text, "html.parser") self.assertEqual(soup.title.string, "Dataset http://localhost/.html") self.assertEqual(soup.form["action"], "http://localhost/.html") self.assertEqual(soup.form["method"], "POST") # check that all variables are present ids = [var.id for var in walk(VerySimpleSequence)] for h2, id_ in zip(soup.find_all("h2"), ids): self.assertEqual(h2.string, id_)
def wrap_arrayterator(dataset, size): """Wrap `BaseType` objects in an Arrayterator. This function is used to optimize access to huge datasets. It returns a new dataset with data wrapped in Arrayterators. This way the data is read in blocks instead of buffering everything in memory. Since the buffer size of the Arrayterator is in elements, not bytes, we convert according to the data item size. """ for var in walk(dataset, BaseType): if (not isinstance(var.data, Arrayterator) and var.data.dtype.itemsize and var.data.shape): elements = size // var.data.dtype.itemsize var.data = Arrayterator(var.data, elements) return dataset
def get_datafile_variables(url): """Get datafile variables in JSON format Keyword arguments: url -- datafile remote url """ dataset = open_url(url) attributes = {} for child in walk(dataset): parts = child.id.split('.') if hasattr(child, "dimensions") and len(parts) == 1: isVar = False item = {} if len(child.dimensions) == 1: if (child.dimensions[0] != child.id and child.dimensions[0] == 'time'): isVar = True item['dimensions'] = 1 elif len(child.dimensions) == 3: if ('lat' in child.dimensions and 'lon' in child.dimensions and 'time' in child.dimensions): isVar = True item['dimensions'] = 3 if isVar: # Generates a name for the variable. Uses its long name if # possible, otherwise uses the id. if (child.attributes.has_key('long_name') and child.attributes['long_name'] != ""): item['name'] = child.attributes['long_name'] else: item['name'] = child.id attributes[child.id] = item if hasattr(dataset, 'close'): dataset.close() out = json.dumps(attributes) return out
def add_attributes(dataset, attributes): """ Add attributes from a parsed DAS to a dataset. """ dataset.attributes['NC_GLOBAL'] = attributes.get('NC_GLOBAL', {}) dataset.attributes['DODS_EXTRA'] = attributes.get('DODS_EXTRA', {}) for var in walk(dataset): # attributes can be flat, eg, "foo.bar" : {...} if var.id in attributes: var.attributes.update(attributes[var.id]) # or nested, eg, "foo" : { "bar" : {...} } try: var.attributes.update( reduce(operator.getitem, [attributes] + var.id.split('.'))) except KeyError: pass return dataset
def __call__(self, environ, start_response): req = Request(environ) if '.' in req.path_info: basename, response = req.path_info.rsplit('.', 1) else: basename, response = req.path_info, None # cache a local copy if response in self.responses: url = urljoin(self.url, basename) dataset = open_url(url) cachepath = os.path.join(self.cachedir, basename.replace('/', '_')) # here we an use mstat to check the mtime of the file, and # do a HEAD on the dataset to compare with Last-Modified header r = requests.head(url + '.dods') if 'last-modified' in r.headers: last_modified = time.mktime(parsedate(r.headers['last-modified'])) mtime = time.mktime(time.localtime( os.stat(cachepath)[ST_MTIME] )) if last_modified > mtime: os.unlink(cachepath) # replace data with a caching version for var in walk(dataset, BaseType): var.data = CachingArrayProxy( cachepath, self.tilesize, self.maxsize, var.type, var.id, var.data.url, var.data.shape, var.data._slice) #for var in walk(dataset, SequenceType): # var.data = CachingSequenceProxy( # cachepath, # var.id, # var.data.url, var.data.slice, var.data._slice, var.data.children) app = SimpleHandler(dataset) # pass this upstream else: app = Proxy(self.url) return app(environ, start_response)
def __iter__(self): scheme, netloc, path, query, fragment = urlsplit(self.url) id_ = ','.join('%s.%s' % (self.id, child) for child in self.children) or self.id url = urlunsplit(( scheme, netloc, path + '.dods', quote(id_) + hyperslab(self._slice) + '&' + query, fragment)) resp, data = request(url) dds, xdrdata = data.split('\nData:\n', 1) dataset = DDSParser(dds).parse() dataset.data = DapUnpacker(xdrdata, dataset).getvalue() dataset._set_id() # Strip any projections from the request id. id_ = re.sub('\[.*?\]', '', self.id) # And return the proper data. for var in walk(dataset): if var.id == id_: data = var.data if isinstance(var, SequenceType): order = [var.keys().index(k) for k in self.children] data = reorder(order, data, var._nesting_level) return iter(data)
def __iter__(self): scheme, netloc, path, query, fragment = urlsplit(self.url) id_ = ','.join('%s.%s' % (self.id, child) for child in self.children) or self.id url = urlunsplit( (scheme, netloc, path + '.dods', id_ + hyperslab(self._slice) + '&' + query, fragment)) resp, data = request(url) dds, xdrdata = data.split('\nData:\n', 1) dataset = DDSParser(dds).parse() dataset.data = DapUnpacker(xdrdata, dataset).getvalue() dataset._set_id() # Strip any projections from the request id. id_ = re.sub('\[.*?\]', '', self.id) # And return the proper data. for var in walk(dataset): if var.id == id_: data = var.data if isinstance(var, SequenceType): order = [var.keys().index(k) for k in self.children] data = reorder(order, data, var._nesting_level) return iter(data)
def open_url(url): """ Open a given dataset URL, trying different response methods. The function checks the stub DDX method, and falls back to the DDS+DAS responses. It can be easily extended for other representations like JSON. The URL should point to the dataset, omitting any response extensions like ``.dds``. Username and password can be passed in the URL like:: http://user:[email protected]:port/path They will be transmitted as plaintext if the server supports only Basic authentication, so be careful. For Digest authentication this is safe. The URL can point directly to an Opendap dataset, or it can contain any number of contraint expressions (selection/projections):: http://example.com/dataset?var1,var2&var3>10 You can also specify a cache directory, a timeout and a proxy using the global variables from ``pydap.lib``:: >>> import pydap.lib >>> pydap.lib.TIMEOUT = 60 # seconds >>> pydap.lib.CACHE = '.cache' >>> import httplib2 >>> from pydap.util import socks >>> pydap.lib.PROXY = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP, 'localhost', 8000) """ for response in [_ddx, _ddsdas]: dataset = response(url) if dataset: break else: raise ClientError("Unable to open dataset.") # Remove any projections from the url, leaving selections. scheme, netloc, path, query, fragment = urlsplit(url) projection, selection = parse_qs(query) url = urlunsplit( (scheme, netloc, path, '&'.join(selection), fragment)) # Set data to a Proxy object for BaseType and SequenceType. These # variables can then be sliced to retrieve the data on-the-fly. for var in walk(dataset, BaseType): var.data = ArrayProxy(var.id, url, var.shape) for var in walk(dataset, SequenceType): var.data = SequenceProxy(var.id, url) # Set server-side functions. dataset.functions = Functions(url) # Apply the corresponding slices. projection = fix_shn(projection, dataset) for var in projection: target = dataset while var: token, slice_ = var.pop(0) target = target[token] if slice_ and isinstance(target.data, VariableProxy): shape = getattr(target, 'shape', (sys.maxint,)) target.data._slice = fix_slice(slice_, shape) return dataset
def __call__(self, environ, start_response): # specify that we want the parsed dataset environ['x-wsgiorg.want_parsed_response'] = True req = Request(environ) original_query = req.query_string projection, selection = parse_ce(req.query_string) # apply selection without any function calls req.query_string = '&'.join(expr for expr in selection if not FUNCTION.match(expr)) res = req.get_response(self.app) # ignore DAS requests path, response = req.path.rsplit('.', 1) if response == 'das': return self.app(environ, start_response) # get the dataset method = getattr(res.app_iter, 'x_wsgiorg_parsed_response', False) if not method: environ['QUERY_STRING'] = original_query return self.app(environ, start_response) dataset = method(DatasetType) # apply selection containing server-side functions selection = (expr for expr in selection if FUNCTION.match(expr)) for expr in selection: if RELOP.search(expr): call, op, other = RELOP.split(expr) op = { '<' : operator.lt, '>' : operator.gt, '!=': operator.ne, '=' : operator.eq, '>=': operator.ge, '<=': operator.le, '=~': lambda a, b: re.match(b, a), }[op] other = ast.literal_eval(other) else: call, op, other = expr, operator.eq, 1 # evaluate the function call sequence = eval_function(dataset, call, self.functions) # is this an inplace call? for var in walk(dataset, SequenceType): if sequence is var: break else: # get the data from the resulting variable, and use it to # constrain the original dataset data = np.fromiter(sequence) print data.shape valid = op(data, other) for sequence in walk(dataset, SequenceType): data = np.asarray(list(sequence), 'O')[valid] sequence.data = np.asarray(list(sequence), 'O')[valid] dataset = out # now apply projection if projection: projection = fix_shorthand(projection, dataset) base = [p for p in projection if not isinstance(p, basestring)] func = [p for p in projection if isinstance(p, basestring)] # apply non-function projection out = apply_projection(base, dataset) # apply function projection for call in func: var = eval_function(dataset, call, self.functions) for child in walk(var): parent = reduce(operator.getitem, [out] + child.id.split('.')[:-1]) if child.name not in parent.keys(): parent[child.name] = child break dataset = out # Return the original response (DDS, DAS, etc.) path, response = req.path.rsplit('.', 1) res = BaseHandler.responses[response](dataset) return res(environ, start_response)
def setUp(self): """Build dataset with no shared data.""" self.dataset = copy.copy(SimpleGrid) for var in walk(self.dataset, BaseType): var.data = var.data.copy()
def __init__(self, dataset): BaseResponse.__init__(self, dataset) self.nc = netcdf_file(None) if 'NC_GLOBAL' in self.dataset.attributes: self.nc._attributes.update(self.dataset.attributes['NC_GLOBAL']) dimensions = [ var.dimensions for var in walk(self.dataset) if isinstance(var, BaseType) ] dimensions = set(reduce(lambda x, y: x + y, dimensions)) try: unlim_dim = self.dataset.attributes['DODS_EXTRA'][ 'Unlimited_Dimension'] except: unlim_dim = None # GridType for grid in walk(dataset, GridType): # add dimensions for dim, map_ in grid.maps.items(): if dim in self.nc.dimensions: continue n = None if dim == unlim_dim else grid[dim].data.shape[0] self.nc.createDimension(dim, n) if not n: self.nc.set_numrecs(grid[dim].data.shape[0]) var = grid[dim] # and add dimension variable self.nc.createVariable(dim, var.dtype.char, (dim, ), attributes=var.attributes) # finally add the grid variable itself base_var = grid[grid.name] var = self.nc.createVariable(base_var.name, base_var.dtype.char, base_var.dimensions, attributes=base_var.attributes) # Sequence types! for seq in walk(dataset, SequenceType): self.nc.createDimension(seq.name, None) try: n = len(seq) except TypeError: # FIXME: materializing and iterating through a sequence to find the length # could have performance problems and could potentially consume the iterable # Do lots of testing here and determine the result of not calling set_numrecs() n = len([x for x in seq[seq.keys()[0]]]) self.nc.set_numrecs(n) dim = seq.name, for child in seq.children(): dtype = child.dtype # netcdf does not have a date type, so remap to float if dtype == np.dtype('datetime64'): dtype = np.dtype('float32') elif dtype == np.dtype('object'): raise TypeError( "Don't know how to handle numpy type {0}".format( dtype)) var = self.nc.createVariable(child.name, dtype.char, dim, attributes=child.attributes) self.headers.extend([('Content-type', 'application/x-netcdf')]) # Optionally set the filesize header if possible try: self.headers.extend([('Content-length', self.nc.filesize)]) except ValueError: pass
def __iter__(self): nc = self.nc # Hack to find the variables if they're nested in the tree var2id = {} for recvar in nc.variables.keys(): for dstvar in walk(self.dataset, BaseType): if recvar == dstvar.name: var2id[recvar] = dstvar.id continue def type_generator(input): epoch = datetime(1970, 1, 1) # is this a "scalar" (i.e. a standard python object) # if so, it needs to be a numpy array, or at least have 'dtype' and 'byteswap' attributes for value in input: if isinstance(value, (type(None), str, int, float, bool, datetime)): # special case datetimes, since dates aren't supported by NetCDF3 if type(value) == datetime: since_epoch = (value - epoch).total_seconds() yield np.array(since_epoch / 3600. / 24., dtype='Float32') # days since epoch else: yield np.array(value) else: yield value def nonrecord_input(): for varname in nc.non_recvars.keys(): debug("Iterator for %s", varname) dst_var = get_var(self.dataset, var2id[varname]).data # skip 0-d variables if not dst_var.shape: continue # Make sure that all elements of the list are iterators for x in dst_var: yield x debug("Done with nonrecord input") # Create a generator for the record variables recvars = nc.recvars.keys() def record_generator(nc, dst, table): debug("record_generator() for dataset %s", dst) vars = [ iter(get_var(dst, table[varname])) for varname in nc.recvars.keys() ] while True: for var in vars: try: yield var.next() except StopIteration: raise more_input = type_generator(record_generator(nc, self.dataset, var2id)) # Create a single pipeline which includes the non-record and record variables pipeline = nc_generator( nc, chain(type_generator(nonrecord_input()), more_input)) # Generate the netcdf stream for block in pipeline: yield block