def nc_attributes(self): atts = super(IncompleteMultidimensionalTrajectory, self).nc_attributes() return dict_update(atts, { 'global' : { 'featureType': 'trajectory', 'cdm_data_type': 'Trajectory' }, 'trajectory' : { 'cf_role': 'trajectory', 'long_name' : 'trajectory identifier' }, 'distance' : { 'long_name': 'Great circle distance between trajectory points', 'standard_name': 'distance_between_trajectory_points', 'units': 'm' } })
def from_dataframe(cls, df, output, **kwargs): reserved_columns = ['trajectory', 't', 'x', 'y', 'z', 'distance'] data_columns = [ d for d in df.columns if d not in reserved_columns ] with IncompleteMultidimensionalTrajectory(output, 'w') as nc: trajectory_group = df.groupby('trajectory') max_obs = trajectory_group.size().max() unique_trajectories = df.trajectory.unique() nc.createDimension('trajectory', unique_trajectories.size) nc.createDimension('obs', max_obs) # Metadata variables nc.createVariable('crs', 'i4') trajectory = nc.createVariable('trajectory', get_dtype(df.trajectory), ('trajectory',)) # Create all of the variables time = nc.createVariable('time', 'i4', ('trajectory', 'obs'), fill_value=int(cls.default_fill_value)) z = nc.createVariable('z', get_dtype(df.z), ('trajectory', 'obs'), fill_value=df.z.dtype.type(cls.default_fill_value)) latitude = nc.createVariable('latitude', get_dtype(df.y), ('trajectory', 'obs'), fill_value=df.y.dtype.type(cls.default_fill_value)) longitude = nc.createVariable('longitude', get_dtype(df.x), ('trajectory', 'obs'), fill_value=df.x.dtype.type(cls.default_fill_value)) if 'distance' in df: distance = nc.createVariable('distance', get_dtype(df.distance), ('trajectory', 'obs'), fill_value=df.distance.dtype.type(cls.default_fill_value)) attributes = dict_update(nc.nc_attributes(), kwargs.pop('attributes', {})) for i, (uid, gdf) in enumerate(trajectory_group): trajectory[i] = uid # tolist() converts to a python datetime object without timezone g = gdf.t.fillna(999999).tolist() # 999999 is a dummy value NaTs = gdf.t.isnull() timenums = np.ma.MaskedArray(nc4.date2num(g, units=cls.default_time_unit)) timenums.mask = NaTs time[i, :] = timenums latitude[i, :] = gdf.y.fillna(latitude._FillValue).values longitude[i, :] = gdf.x.fillna(longitude._FillValue).values z[i, :] = gdf.z.fillna(z._FillValue).values if 'distance' in gdf: distance[i, :] = gdf.distance.fillna(distance._FillValue).values for c in data_columns: # Create variable if it doesn't exist var_name = cf_safe_name(c) if var_name not in nc.variables: if np.issubdtype(gdf[c].dtype, 'S') or gdf[c].dtype == object: # AttributeError: cannot set _FillValue attribute for VLEN or compound variable v = nc.createVariable(var_name, get_dtype(gdf[c]), ('trajectory', 'obs')) else: v = nc.createVariable(var_name, get_dtype(gdf[c]), ('trajectory', 'obs'), fill_value=gdf[c].dtype.type(cls.default_fill_value)) if var_name not in attributes: attributes[var_name] = {} attributes[var_name] = dict_update(attributes[var_name], { 'coordinates' : 'time latitude longitude z', }) else: v = nc.variables[var_name] if hasattr(v, '_FillValue'): vvalues = gdf[c].fillna(v._FillValue).values else: # Use an empty string... better than nothing! vvalues = gdf[c].fillna('').values sl = slice(0, vvalues.size) v[i, sl] = vvalues # Set global attributes nc.update_attributes(attributes) return IncompleteMultidimensionalTrajectory(output, **kwargs)
def from_dataframe(cls, df, output, **kwargs): reserved_columns = ['trajectory', 'profile', 't', 'x', 'y', 'z', 'distance'] data_columns = [ d for d in df.columns if d not in reserved_columns ] with IncompleteMultidimensionalProfile(output, 'w') as nc: profile_group = df.groupby('profile') max_zs = profile_group.size().max() unique_profiles = df.profile.unique() nc.createDimension('profile', unique_profiles.size) nc.createDimension('z', max_zs) # Metadata variables nc.createVariable('crs', 'i4') profile = nc.createVariable('profile', get_dtype(df.profile), ('profile',)) # Create all of the variables time = nc.createVariable('time', 'i4', ('profile',)) latitude = nc.createVariable('latitude', get_dtype(df.y), ('profile',)) longitude = nc.createVariable('longitude', get_dtype(df.x), ('profile',)) if 'distance' in df: distance = nc.createVariable('distance', get_dtype(df.distance), ('profile',)) z = nc.createVariable('z', get_dtype(df.z), ('profile', 'z'), fill_value=df.z.dtype.type(cls.default_fill_value)) attributes = dict_update(nc.nc_attributes(), kwargs.pop('attributes', {})) for i, (uid, pdf) in enumerate(profile_group): profile[i] = uid time[i] = nc4.date2num(pdf.t.iloc[0], units=cls.default_time_unit) latitude[i] = pdf.y.iloc[0] longitude[i] = pdf.x.iloc[0] if 'distance' in pdf: distance[i] = pdf.distance.iloc[0] zvalues = pdf.z.fillna(z._FillValue).values sl = slice(0, zvalues.size) z[i, sl] = zvalues for c in data_columns: # Create variable if it doesn't exist var_name = cf_safe_name(c) if var_name not in nc.variables: if np.issubdtype(pdf[c].dtype, 'S') or pdf[c].dtype == object: # AttributeError: cannot set _FillValue attribute for VLEN or compound variable v = nc.createVariable(var_name, get_dtype(pdf[c]), ('profile', 'z')) else: v = nc.createVariable(var_name, get_dtype(pdf[c]), ('profile', 'z'), fill_value=pdf[c].dtype.type(cls.default_fill_value)) if var_name not in attributes: attributes[var_name] = {} attributes[var_name] = dict_update(attributes[var_name], { 'coordinates' : 'time latitude longitude z', }) else: v = nc.variables[var_name] if hasattr(v, '_FillValue'): vvalues = pdf[c].fillna(v._FillValue).values else: # Use an empty string... better than nothing! vvalues = pdf[c].fillna('').values sl = slice(0, vvalues.size) v[i, sl] = vvalues # Set global attributes nc.update_attributes(attributes) return IncompleteMultidimensionalProfile(output, **kwargs)