def dataset(): """Return dataset metadata.""" # get the dataset from the current app ds = get_ds() logger.info('opened %s', ds) if ds is None: resp = {"_comment": "no data loaded"} return jsonify(resp) # this can be a bit slow date_nums = ds.variables['date_time'][:] # if we have an actual range, use that try: times = netCDF4.num2date(ds.variables['date_time'].valid_range, ds.variables['date_time'].units) except AttributeError: # use datenums times = netCDF4.num2date( [date_nums.min(), date_nums.max()], ds.variables['date_time'].units) if times[0].year < 1970: times[0] = datetime.datetime(1970, 1, 1) resp = { "name": ds.filepath(), "variables": list(ds.variables.keys()), "time_extent": [ ensure_datetime(times[0]).isoformat(), ensure_datetime(times[-1]).isoformat() ] } ds.close() return jsonify(resp)
def load(): # TODO: validate filename further, otherwise we might load any file req_data = request.get_json() # the filename string filename = req_data.get('filename') logger.debug(filename, pathlib.Path(filename).expanduser()) # the expanded path filepath = pathlib.Path(filename).expanduser() resp = {"loaded": False} if not filepath.suffix == '.nc': resp["error"] = "filename does not end in .nc" return jsonify(resp) if not filepath.exists(): resp["error"] = "file does not exist" return jsonify(resp) print(req_data) if req_data.get('copy', False): tmp_dir = tempfile.mkdtemp(prefix='sdc-', suffix='-remove') # copy the expanded file shutil.copy(filepath, tmp_dir) # replace filename with new filename filename = str(pathlib.Path(tmp_dir) / filepath.name) # add the dataset to the loaded app # perhaps use flask.g, but that did not work current_app.filename = filename ds = get_ds() resp["loaded"] = True resp["filename"] = filename ds.close() return jsonify(resp)
def extent(): """Return dataset extent.""" # get the dataset from the current app ds = get_ds() if ds is None: return jsonify({'error': 'data not loaded'}) # ensure that our array is always masked date_time = np.ma.masked_array(ds.variables['date_time'][:]) t_ini = netCDF4.num2date(np.min(date_time[:]), ds.variables['date_time'].units) t_fin = netCDF4.num2date(np.max(date_time[:]), ds.variables['date_time'].units) resp = [t_ini.year, t_fin.year] ds.close() return jsonify(resp)
def get_profiles(): """ Return profile for selected points""" # read inputs cdi_ids_input = request.args.getlist("cdi_ids") dataset = request.values.get("dataset") ds = get_ds(dataset=dataset) if ds is None: return jsonify({'error': 'data not loaded'}) # get the variable that has the cdi_ids cdi_id_var = get_cdi_id_var(ds=ds) # create a list with all the cdi_ids cdi_ids = netCDF4.chartostring(ds.variables[cdi_id_var][:]) # create a list with the idxs of the given cdi_ids idxs = [] for cdi_id in cdi_ids_input: idx = np.argmax(cdi_ids == cdi_id) idxs.append(idx) # create a list with the var that contain the temperature, salinity and depth values var_names = [ name for name, var in ds.variables.items() if (name.startswith('var') and not '_' in name) ] # prepare the output # TODO take a look at these hardcoded names. Either be an input in the function or something more generic titles = [ "Water temperature", "Water body salinity", "Depth", "cdi_id", "lat", "lon" ] output = [] output.append(titles) for idx in idxs: cdi_id = netCDF4.chartostring(ds.variables[cdi_id_var][idx]) lon = ds.variables['longitude'][idx].item(0) lat = ds.variables['latitude'][idx].item(0) np.array2string(cdi_id) idx_variables = {} for var_name in var_names: var = ds.variables[var_name] try: idx_variables[var.long_name] = var[idx] except IdexError: print("failed to index {} with index {}".format(var, idx)) cdi_id_array = np.empty(shape=idx_variables["Depth"].shape, dtype='<U28') cdi_id_array.fill(str(cdi_id)) c = np.array( list( zip(idx_variables["ITS-90 water temperature"], idx_variables["Water body salinity"], idx_variables["Depth"]))) df = pd.DataFrame(data=c) df = df.dropna(how='all') # create a list with lists of the values ls = df.values.tolist() #pass through all the lists of the list and append with the # corresponding cdi_id # every list: temperature, salinity, depth, cdi_id for item in ls: item.extend((str(cdi_id), round(lat, 4), round(lon, 4))) #item.append(str(cdi_id)) output.append(item) response = {"data": output} #, allow_nan=False return simplejson.dumps(response, ignore_nan=True)
def dataset_slice(): """Return dataset content.""" # get the dataset from the current app year = int(request.values.get('year', datetime.datetime.now().year)) depth = int(request.values.get('depth', 0)) """ read some variables and return an open file handle, based on data selection. """ ds = get_ds() if ds is None: return jsonify({'error': 'data not loaded'}) # slicing in time! t0 = netCDF4.date2num(datetime.datetime(year=year, month=1, day=1), ds.variables['date_time'].units) t1 = netCDF4.date2num(datetime.datetime(year=year + 1, month=1, day=1), ds.variables['date_time'].units) # ensure that our array is always masked date_time = np.ma.masked_array(ds.variables['date_time'][:]) is_in_date = np.logical_and(date_time[:] >= t0, date_time[:] < t1).data t = np.empty(len(date_time[is_in_date]), dtype=type(datetime.datetime.now())) # split nans and notnans makes it much faster dtf = np.where(date_time[is_in_date].mask == False) dtt = np.where(date_time[is_in_date].mask == True) t[dtf] = netCDF4.num2date(date_time[is_in_date][dtf], ds.variables['date_time'].units) # do we have any masked values if dtt and dtt[0]: t[dtt] = netCDF4.num2date(date_time[is_in_date][dtt], ds.variables['date_time'].units) # # TODO: slicing through Depth... Hard with this sort of unstructured netcdf. # if data['var1'].long_name == "Depth": # depth = None # else: depth = None if 'lat' in ds.variables: lat = ds['lat'][is_in_date] elif 'latitude' in ds.variables: lat = ds['latitude'][is_in_date] if 'lon' in ds.variables: lon = ds['lon'][is_in_date] elif 'longitude' in ds.variables: lon = ds['longitude'][is_in_date] cdi_id = netCDF4.chartostring(ds.variables['metavar4'][is_in_date]) coordinates = np.c_[antimeridian_cut(lon), lat].tolist() features = [] for i, (coordinate, cdi_id_i) in enumerate(zip(coordinates, cdi_id)): geometry = geojson.Point(coordinate) feature = geojson.Feature(id=i, geometry=geometry, properties={"cdi_id": cdi_id_i}) features.append(feature) collection = geojson.FeatureCollection(features=features) ds.close() return jsonify(collection)
def get_profile(): """Return profile for one cdi_id""" cdi_id = request.values.get("cdi_id") cdi_id = str(cdi_id) dataset = request.values.get("dataset") ds = get_ds(dataset=dataset) if ds is None: return jsonify({'error': 'data not loaded'}) cdi_id_var = get_cdi_id_var(ds) cdi_ids = netCDF4.chartostring(ds.variables[cdi_id_var][:]) # get the first idx = np.argmax(cdi_ids == cdi_id) var_names = [ name for name, var in ds.variables.items() if name.startswith('var') and not '_' in name ] # add the variables to the list variables = {} for var_name in var_names: var = ds.variables[var_name] try: variables[var.long_name] = var[idx] except IndexError: logger.exception("failed to index {} with index {}".format( var, idx)) df = pd.DataFrame(data=variables) # get rid of missing data df = df.dropna(how='all') # get metadata date_nums = ds.variables['date_time'][idx] date_units = ds.variables['date_time'].units date = netCDF4.num2date(date_nums, date_units) records = json.loads(df.to_json(orient='records')) lon = ds.variables['longitude'][idx] lat = ds.variables['latitude'][idx] meta_var_names = [ name for name, var in ds.variables.items() if name.startswith('metavar') and not '_' in name ] meta_vars = {} for var_name in meta_var_names: var = ds.variables[var_name] if var.dtype == 'S1' and len(var.shape) > 1: meta_vars[var.long_name] = str(netCDF4.chartostring(var[idx])) else: meta_vars[var.long_name] = var[idx] ds.close() # ensure date time date = ensure_datetime(date) meta_vars.update({ "date": date.isoformat(), "cdi_id": cdi_id, "lon": lon, "lat": lat }) response = {"data": records, "meta": meta_vars} return jsonify(response)