def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): if df is None: df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) trajectories = {} for tid, tgroup in df.groupby('trajectory'): tgroup = tgroup.sort_values('t') first_row = tgroup.iloc[0] first_loc = Point(first_row.x, first_row.y) geometry = None if geometries: coords = list(unique_justseen(zip(tgroup.x, tgroup.y))) if len(coords) > 1: geometry = LineString(coords) elif coords == 1: geometry = first_loc trajectory = namedtuple('Trajectory', ['min_z', 'max_z', 'min_t', 'max_t', 'first_loc', 'geometry']) trajectories[tid] = trajectory( min_z=tgroup.z.min(), max_z=tgroup.z.max(), min_t=tgroup.t.min(), max_t=tgroup.t.max(), first_loc=first_loc, geometry=geometry ) meta = namedtuple('Metadata', ['min_t', 'max_t', 'trajectories']) return meta( min_t=df.t.min(), max_t=df.t.max(), trajectories=trajectories )
def trajectory_calculated_metadata(df, axes, geometries=True): trajectories = {} for tid, tgroup in df.groupby(axes.trajectory): tgroup = tgroup.sort_values(axes.t) if geometries: null_coordinates = tgroup[axes.x].isnull() | tgroup[ axes.y].isnull() coords = list( unique_justseen( zip(tgroup.loc[~null_coordinates, axes.x].tolist(), tgroup.loc[~null_coordinates, axes.y].tolist()))) else: # Calculate the geometry as the linestring between all of the profile points first_row = tgroup.iloc[0] coords = [(first_row[axes.x], first_row[axes.y])] geometry = None if len(coords) > 1: geometry = LineString(coords) elif len(coords) == 1: geometry = Point(coords[0]) trajectories[tid] = trajectory_meta(min_z=tgroup[axes.z].min(), max_z=tgroup[axes.z].max(), min_t=tgroup[axes.t].min(), max_t=tgroup[axes.t].max(), geometry=geometry) return trajectories_meta(min_z=df[axes.z].min(), max_z=df[axes.z].max(), min_t=df[axes.t].min(), max_t=df[axes.t].max(), trajectories=trajectories)
def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True): if df is None: df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows) profiles = {} for pid, pgroup in df.groupby('profile'): pgroup = pgroup.sort_values('t') first_row = pgroup.iloc[0] profile = namedtuple('Profile', ['min_z', 'max_z', 't', 'x', 'y', 'loc']) profiles[pid] = profile(min_z=pgroup.z.min(), max_z=pgroup.z.max(), t=first_row.t, x=first_row.x, y=first_row.y, loc=Point(first_row.x, first_row.y)) geometry = None first_row = df.iloc[0] first_loc = Point(first_row.x, first_row.y) if geometries: coords = list(unique_justseen(zip(df.x, df.y))) if len(coords) > 1: geometry = LineString(coords) elif len(coords) == 1: geometry = first_loc meta = namedtuple('Metadata', [ 'min_z', 'max_z', 'min_t', 'max_t', 'profiles', 'first_loc', 'geometry' ]) return meta(min_z=df.z.min(), max_z=df.z.max(), min_t=df.t.min(), max_t=df.t.max(), profiles=profiles, first_loc=first_loc, geometry=geometry)
def get_temporal_attributes(df, axes=None): """ Use values in a dataframe to set temporal attributes for the eventual netCDF file Attribute names come from https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/ :param df: data (Pandas DataFrame) :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters. :return: nested dictionary of variable and global attributes """ axes = get_default_axes(axes) mint = df[axes.t].min() maxt = df[axes.t].max() times = pd.DatetimeIndex(unique_justseen(df[axes.t])) dt_index_diff = times[1:] - times[:-1] dt_counts = dt_index_diff.value_counts(sort=True) if dt_counts.size > 0 and dt_counts.values[0] / (len(times) - 1) > 0.75: mode_value = dt_counts.index[0] else: # Calculate a static resolution mode_value = ((maxt - mint) / len(times)) return { 'variables': { axes.t: { 'attributes': { 'actual_min': mint.strftime('%Y-%m-%dT%H:%M:%SZ'), 'actual_max': maxt.strftime('%Y-%m-%dT%H:%M:%SZ'), } }, }, 'attributes': { 'time_coverage_start': mint.strftime('%Y-%m-%dT%H:%M:%SZ'), 'time_coverage_end': maxt.strftime('%Y-%m-%dT%H:%M:%SZ'), 'time_coverage_duration': (maxt - mint).round('1S').isoformat(), 'time_coverage_resolution': mode_value.round('1S').isoformat() } }
def profile_calculated_metadata(df, axes, geometries=True): profiles = {} for pid, pgroup in df.groupby(axes.profile): pgroup = pgroup.sort_values(axes.t) first_row = pgroup.iloc[0] profiles[pid] = profile_meta(min_z=pgroup[axes.z].min(), max_z=pgroup[axes.z].max(), t=first_row[axes.t], x=first_row[axes.x], y=first_row[axes.y], id=pid, geometry=Point(first_row[axes.x], first_row[axes.y])) if geometries: null_coordinates = df[axes.x].isnull() | df[axes.y].isnull() coords = list( unique_justseen( zip(df.loc[~null_coordinates, axes.x].tolist(), df.loc[~null_coordinates, axes.y].tolist()))) else: # Calculate the geometry as the linestring between all of the profile points coords = [p.geometry for _, p in profiles.items()] geometry = None if len(coords) > 1: geometry = LineString(coords) elif len(coords) == 1: geometry = Point(coords[0]) return profiles_meta(min_z=df[axes.z].min(), max_z=df[axes.z].max(), min_t=df[axes.t].min(), max_t=df[axes.t].max(), profiles=profiles, geometry=geometry)