def __init__(self, files=None): """ Initialization Parameters ---------- files : str The file(s) to be read in. """ self.events = Ltg() self.groups = Ltg() self.flashes = Ltg() if files is not None: self.readFile(files)
def read_events_nc(files, no_error=True): """ Read in the nc file produced by the Level 0 reader. Clem Tillier of Lockheed Martin has made a L0 reader available that will produce events. These events are not geo-navigated. Unique IDs are not implemented. Parameters ----------- files: str or sequence of string The files to read in no_error: bool If True, then throw out any events associated with an error. Right now, this keyword has no effect - error events are always dropped. (In the future, we'll add an option to keep them!) """ import xarray as xr files = np.atleast_1d(files) data = list() for f in files: ev = xr.open_dataset(f, decode_times=False) if (ev.error_count != 0) & True: # TODO Add ability to keep error events ev = ev.drop_dims('number_of_errors') # There's a few scalar variables we don't keep, as they become # columns that all have the same value: ev = ev.drop(['event_count', 'error_count', 'spw_dropped_event_count']) # Now, we should be able to get to a DataFrame that's sensible... ev = ev.to_dataframe() # Now, we build the time. It takes some awkward code gymnastics... time = (np.datetime64('2000-01-01T12', 'ns') + ev.event_day.astype('int64').values.astype('timedelta64[D]') + ev.event_millisecond.astype('int64').values.astype( 'timedelta64[ms]') + ev.event_microsecond.astype( 'int64').values.astype('timedelta64[us]')) ev['time'] = time data.append(ev) data = Ltg(pd.concat(data, ignore_index=True)) return data
def read_events_nc(files): """ Read in the nc file produced by the Level 0 reader. Clem Tillier of Lockheed Martin has made a L0 reader available that will produce events. These events are not geo-navigated. Unique IDs are not implemented. Parameters ----------- files: str or sequence of string The files to read in """ import xarray as xr files = np.atleast_1d(files) data = list() for f in files: ev = xr.open_dataset(f, decode_times=False) ev = ev.to_dataframe() # Drop the scalar columns ev.drop( columns=['event_count', 'error_count', 'spw_dropped_event_count'], inplace=True) # Now, we build the time. It takes some awkward code gymnastics... time = (np.datetime64('2000-01-01T12', 'ns') + ev.event_day.astype('int64').values.astype('timedelta64[D]') + ev.event_millisecond.astype('int64').values.astype( 'timedelta64[ms]') + ev.event_microsecond.astype( 'int64').values.astype('timedelta64[us]')) ev['time'] = time data.append(ev) data = Ltg(pd.concat(data)) return data
def search_path(path, time_range, lon_range=None, lat_range=None): """ Search the given path for GLM data that satisfy the given ranges. This provides a way to search an archive of GLM data files for lightning in the give time/space bounds. Data is assumed to be in a folder structure path/doy/<sub_dirs>/<glm_files> Note: Time range can't cross dates (yet) Parameters ---------- path : str The path to be searched. See above doc for assumed directory structure under this path. Will be passed to `pathlib.Path` time_range : list-like 2 element iterable specifying the start/stop time you're interested in. The values are passed to `pandas.Timestamp`. lon_range : list-like, optional 2 element iterable providing the longitude range. The default is None. If None, all longitudes are considered. lat_range : list-like, optional 2 element iterable providing the latitude range. The default is None. If None, all latitudes are considered. Returns ------- GLM A pyltg.GLM class with the data that satisfies the given ranges. """ time_range = [pd.Timestamp(t) for t in time_range] # First, go the day-of-year folder. doy = time_range[0].dayofyear search_path = Path(path).joinpath('{:03}'.format(doy)) all_files = np.array(sorted(search_path.rglob('OR_GLM*'))) times = np.array(filename2date(all_files)) # To make sure we don't miss any files, buffer the times a bit. # Since GLM files are 20 seconds, this should be sufficient. # We'll make it a little bigger just to be sure... BUFFER = pd.Timedelta(30, 's') idx = (times >= (time_range[0] - BUFFER)) & (times <= (time_range[1] + BUFFER)) if not np.count_nonzero(idx): print('No files found') return None # These are the files that should contain our range. files = all_files[idx] all_g = GLM(files) fl_t = all_g.flashes.time # Sigh, can't do datetime64[ns] and Timestamp. # And, we don't have an quick way to convert an array of datetime64 to Timestamp. good_idx = (fl_t >= np.datetime64(time_range[0])) & (fl_t <= np.datetime64( time_range[1])) if lat_range is not None: fl_lat = all_g.flashes.lat lat_idx = (fl_lat >= lat_range[0]) & (fl_lat <= lat_range[1]) good_idx = good_idx & lat_idx if lon_range is not None: fl_lon = all_g.flashes.lon lon_idx = (fl_lon >= lon_range[0]) & (fl_lon <= lon_range[1]) good_idx = good_idx & lon_idx # Make sure we have _something_ if not np.count_nonzero(good_idx): print('No data found in provided range') return None else: fl = all_g.flashes[good_idx] ev, grp = all_g.get_groups(fl.id, combine=True, events=True) g = GLM() g.flashes = Ltg(fl) g.groups = Ltg(grp) g.events = Ltg(ev) return g
def readFile(self, files): """ Read the given file(s). Use GLMDataset to (mininally) read in the files, but we're going to extract things and put them in certain places. Parameters ---------- files : str The file to be read in. """ files = np.atleast_1d(files) # allow scalar input events = list() groups = list() flashes = list() ev_id_ctr = 0 gr_id_ctr = 0 fl_id_ctr = 0 for _file in files: # Extract the GLM data. Since we'll handle the parent-child # relationship, don't do it here. this_glm = glmt.GLMDataset(_file, calculate_parent_child=False, change_energy_units=False) # Some GLM files have no data. Check for these cases: # todo: do we need to check groups and flashes too? if this_glm.dataset.dims['number_of_events'] == 0: continue this_event = _extract_events(this_glm) this_group = _extract_groups(this_glm) this_flash = _extract_flashes(this_glm) # We're going to modify the IDs a bit, since they can rollover. # The flash IDs seem to rollover at 2**16-1, but the group IDs # are MUCH weirder. The rollover seems to happen somewhere # between 2**29 and 2**30. To get reasonable IDs, we're going to # modify these IDs too. # First, get a "mapping" from the current IDs to unique values: new_flash_id = np.arange(len(this_flash)) # Now, update the IDs to this new mapping: this_flash.id = new_flash_id # Update the parent IDs for the groups: # Get a dictionary to map the values: flash_id_map = dict(zip(this_flash._orig_id.values, new_flash_id)) new_id = this_group.parent_id.map(flash_id_map.get) # Note: mapping is MUCH faster than using the DataFrame.replace method this_group.parent_id = new_id # Now, do the same thing with group/events: new_group_id = np.arange(len(this_group)) this_group.id = new_group_id group_id_map = dict(zip(this_group._orig_id.values, new_group_id)) this_event.parent_id = this_event.parent_id.map(group_id_map.get) # We'll sort these by id. Makes counting children easier. this_event.sort_values('id', inplace=True) this_group.sort_values('id', inplace=True) this_flash.sort_values('id', inplace=True) # Add in an offset to get unique values across files this_event['id'] += ev_id_ctr this_group['id'] += gr_id_ctr this_flash['id'] += fl_id_ctr # Offset the parent IDs for the children too: this_event['parent_id'] += gr_id_ctr this_group['parent_id'] += fl_id_ctr # Next, update the counters ev_id_ctr = this_event['id'].iloc[-1] + 1 gr_id_ctr = this_group['id'].iloc[-1] + 1 fl_id_ctr = this_flash['id'].iloc[-1] + 1 # Count children child_ev = _get_child_count(this_group, this_event) this_group['child_count'] = child_ev child_gr = _get_child_count(this_flash, this_group) this_flash['child_count'] = child_gr # todo: add option to not sort by time this_event.sort_values('time', inplace=True) this_group.sort_values('time', inplace=True) this_flash.sort_values('time', inplace=True) # Finally, add "this" data events.append(this_event) groups.append(this_group) flashes.append(this_flash) if not events: # todo: related to above todo, do we need to check groups/flashes? print('No GLM data found in files. Class will have no data.') else: # Put these as attributes of the class self.events = Ltg(pd.concat(events)) self.groups = Ltg(pd.concat(groups)) self.flashes = Ltg(pd.concat(flashes))
def read_lm_file(files, keepall=False): """ Read Lockheed Martin netCDF file with events and groups. As part of PLT, files were generated using LM's Matlab code that contains Level 1 events and groups. (Both L1b and L1 events could be present). IDs (event, group) are not guaranteed to be unique if reading multiple files Parameters ---------- files: str or sequence of string The files to read in keepall: bool If True, keep all events in the file, even ones that were filtered. Note that only 1b events are geo-navigated. """ from netCDF4 import Dataset files = np.atleast_1d(files) all_ev = list() all_grp = list() for f in files: nc = Dataset(f) # Although a little tedious, we are going to get the fields and # put them into a dict and then to DataFrame... ev = dict() ev['lat'] = nc.variables['event_lat'][:] ev['lon'] = _convert_lon_360_to_180(nc.variables['event_lon'][:]) ev['energy'] = nc.variables['event_energy'][:] ev['parent_id'] = nc.variables['event_parent_group_id'][:] ev['px'] = nc.variables['event_x_LM'][:] ev['py'] = nc.variables['event_y_LM'][:] ev['intensity'] = nc.variables['event_intensity_LM'][:] ev['bg_msb'] = nc.variables['event_bg_msb_LM'][:] ev['filter_id'] = nc.variables['event_filter_id_LM'][:] ev['time'] = _convert_lm_time(nc.variables['event_time_LM'][:].data) # Not using: event_frame_id_LM ev = pd.DataFrame(ev, columns=ev.keys()) if not keepall: good_rows = ~ev.lat.isna() ev = ev[good_rows] all_ev.append(ev) # Now, the groups: # todo: make sure there are groups... grp = dict() grp['lat'] = nc.variables['group_lat'][:] grp['lon'] = _convert_lon_360_to_180(nc.variables['group_lon'][:]) grp['energy'] = nc.variables['group_energy'][:] # Make sure IDs are unsigned... # todo: double check this is OK grp['id'] = nc.variables['group_id'][:].astype(np.uint32) grp['area'] = nc.variables['group_footprint_LM'][:] grp['child_count'] = nc.variables['group_child_count_LM'][:] grp['time'] = _convert_lm_time(nc.variables['group_time_LM'][:].data) grp = pd.DataFrame(grp, columns=grp.keys()) all_grp.append(grp) all_ev = pd.concat(all_ev, ignore_index=True) all_grp = pd.concat(all_grp, ignore_index=True) return Ltg(ev), Ltg(all_grp)
def read_lm_ev_mat(files): """ Read a Lockheed Martin mat file (Matlab save file) that contains only events. As part of PLT, a particular processing chain of L0 data produces Matlab mat files with just events. Usually, this is done with no filters turned on, so we can naviagate all available events (at least, the ones on the earth). The underlying assumption is that there is a structure named `evOut` in the files. No attempt to ensure unique IDs is done. Parameters ----------- files: str or sequence of string The files to read in """ from scipy.io import loadmat files = np.atleast_1d(files) ev = list() # Go through the files, and get the structure... for f in files: mat_data = loadmat(f) ev_out = mat_data['evOut'] # We need to do a little work, since loadmat seems to pull some # weird nesting in the record array.(For the record, squeeze_me # doens't seem to work.) # We'll just extract into a dict, to eventually put it into a DataFrame this_ev = dict(time=_convert_lm_time( ev_out['origination_time'][0][0][0]), px=ev_out['x'][0][0][0], py=ev_out['y'][0][0][0], intensity=ev_out['intensity'][0][0][0], bg_msb=ev_out['bg_msb'][0][0][0], filter_id=ev_out['filterID'][0][0][0], energy=ev_out['energy'][0][0][0], lat=ev_out['lat'][0][0][0], lon=_convert_lon_360_to_180(ev_out['lon'][0][0][0])) # Note: this ignores the following fields present in evOut: # 'device_status', 'consec', 'frame_id', 'df', 'rtep', 'pixel', # 'chan', 'ufid', 'isActive', 'time' ev.append(pd.DataFrame(this_ev)) ev = pd.concat(ev, ignore_index=True) # Now, we need to add alt so that we can make it an Ltg class ev['alt'] = 0.0 return Ltg(ev)
class GLM(): """ Class to handle GLM data. """ def __init__(self, files=None): """ Initialization Parameters ---------- files : str The file(s) to be read in. """ self.events = Ltg() self.groups = Ltg() self.flashes = Ltg() if files is not None: self.readFile(files) def readFile(self, files): """ Read the given file(s). Use GLMDataset to (mininally) read in the files, but we're going to extract things and put them in certain places. Parameters ---------- files : str The file to be read in. """ files = np.atleast_1d(files) # allow scalar input events = list() groups = list() flashes = list() ev_id_ctr = 0 gr_id_ctr = 0 fl_id_ctr = 0 for _file in files: # Extract the GLM data. Since we'll handle the parent-child # relationship, don't do it here. this_glm = glmt.GLMDataset(_file, calculate_parent_child=False, change_energy_units=False) # Some GLM files have no data. Check for these cases: # todo: do we need to check groups and flashes too? if this_glm.dataset.dims['number_of_events'] == 0: continue this_event = _extract_events(this_glm) this_group = _extract_groups(this_glm) this_flash = _extract_flashes(this_glm) # We're going to modify the IDs a bit, since they can rollover. # The flash IDs seem to rollover at 2**16-1, but the group IDs # are MUCH weirder. The rollover seems to happen somewhere # between 2**29 and 2**30. To get reasonable IDs, we're going to # modify these IDs too. # First, get a "mapping" from the current IDs to unique values: new_flash_id = np.arange(len(this_flash)) # Now, update the IDs to this new mapping: this_flash.id = new_flash_id # Update the parent IDs for the groups: # Get a dictionary to map the values: flash_id_map = dict(zip(this_flash._orig_id.values, new_flash_id)) new_id = this_group.parent_id.map(flash_id_map.get) # Note: mapping is MUCH faster than using the DataFrame.replace method this_group.parent_id = new_id # Now, do the same thing with group/events: new_group_id = np.arange(len(this_group)) this_group.id = new_group_id group_id_map = dict(zip(this_group._orig_id.values, new_group_id)) this_event.parent_id = this_event.parent_id.map(group_id_map.get) # We'll sort these by id. Makes counting children easier. this_event.sort_values('id', inplace=True) this_group.sort_values('id', inplace=True) this_flash.sort_values('id', inplace=True) # Add in an offset to get unique values across files this_event['id'] += ev_id_ctr this_group['id'] += gr_id_ctr this_flash['id'] += fl_id_ctr # Offset the parent IDs for the children too: this_event['parent_id'] += gr_id_ctr this_group['parent_id'] += fl_id_ctr # Next, update the counters ev_id_ctr = this_event['id'].iloc[-1] + 1 gr_id_ctr = this_group['id'].iloc[-1] + 1 fl_id_ctr = this_flash['id'].iloc[-1] + 1 # Count children child_ev = _get_child_count(this_group, this_event) this_group['child_count'] = child_ev child_gr = _get_child_count(this_flash, this_group) this_flash['child_count'] = child_gr # todo: add option to not sort by time this_event.sort_values('time', inplace=True) this_group.sort_values('time', inplace=True) this_flash.sort_values('time', inplace=True) # Finally, add "this" data events.append(this_event) groups.append(this_group) flashes.append(this_flash) if not events: # todo: related to above todo, do we need to check groups/flashes? print('No GLM data found in files. Class will have no data.') else: # Put these as attributes of the class self.events = Ltg(pd.concat(events)) self.groups = Ltg(pd.concat(groups)) self.flashes = Ltg(pd.concat(flashes)) def get_events(self, group_ids, combine=False): """ Get child events for a set of groups. Parameters ---------- group_ids : array-like The IDs for the groups for which you want the events combine: bool If True, return a Pandas DataFrame with all events. If False, return a list a DataFrames in which each element of the list corresponds to the events for each group ID. Returns ------- Pandas DataFrame By default, a list of DataFrames is returned. To get one DataFrame, see `combine`. """ evs = get_children(group_ids, self.events) if combine: evs = pd.concat(evs, ignore_index=True) return evs def get_groups(self, flash_ids, combine=False, events=False): """ Get the child groups for a set of flashes. Parameters ---------- flash_ids : array-like The IDs for the flashes for which you want the groups combine : bool If True, return a Pandas DataFrame with all groups. If False, return a list a DataFrames in which each element of the list corresponds to the groups for each flash ID. events : bool If True, also get the child events. If `combine` is True, then the events will be returned in one DataFrame. If not, get a list of DataFrames, one for each group. Returns ------- Pandas DataFrame By default, a list of DataFrames is returned. To get one DataFrame, see `combine`. If `events` is True, then a tuple is returned, with (groups, events). """ flash_ids = np.atleast_1d(flash_ids) grps = list() evs = list() for _id in flash_ids: this_grps = self.groups[self.groups.parent_id == _id] grps.append(this_grps) if events: evs.append(self.get_events(this_grps.id, combine=True)) # TODO: We can get all the events at once, if we have the ids as a list if combine: grps = pd.concat(grps, ignore_index=True) if events: evs = pd.concat(evs, ignore_index=True) if events: return evs, grps else: return grps def plot_groups(self, groups=None, do_events=False, ax=None, latlon=True, gridlines=True, group_marker='.', group_color='black', event_color='yellow', fill_events=True, event_centers=False): """ Make a spatial plot of groups. The plotting is done using the lons along the horizontal axis, lats along the vertical axis. .. warning: Right now, event polygons are only approximate. They are plotted as polygons with vertices 0.04 degrees from event the center. This is fine at GLM nadir, but becomes progressively worse as you look toward the edges of the FOV. Future work will try to geolocate the events edges. Parameters ---------- groups : `Ltg` class or Pandas Dataframe The groups to be plotted. If `None`, plot the active groups. Default is None. do_events : bool If True, then plot the individual child events too. Right now, this is done in an approximate manner. The event footprint is approximated by drawing a 0.04 degree (roughly 4 km) box around the event lat/lon. This roughly matches the GLM pixel size at nadir, so event footprints off-nadir will not be not accurately represented. ax : MPL Axes If given, the plot will be made in the provided Axes. latlon: bool If True, make a map using Cartopy. If True and `ax` is given, then it is assumed that `ax` is a Cartopy GeoAxes or GeoAxesSubplot gridlines: bool If True, then gridlines will be added to the plot. Only valid if `latlon` is also True. group_marker: str The MPL marker used when plotting only groups i.e, when `do_events=False`. group_color: str The MPL color used when plotting only groups i.e, when `do_events=False`. event_color: str The color scheme used to scale the event colors by the energy. Hard coded for now to be the yellow scheme! fill_events: bool If True, fill the events with a color related to `event_color`. If False, just draw an empty polygon. event_centers: bool If True, plot a marker at the center of each event. Returns ------- tuple Two element tuple. The first element is the Axes, and the second element is a `dict`. Depending on the arguments, you could have these: :groups: MPL Line2D :events_poly: List of MPL PolyCollection of event polygons (one element for each group) :events_pt: MPL Line 2D of event centroids :gridlines: Cartopy Gridliner """ import cartopy.crs as ccrs if groups is None: groups = self.groups[self.groups.active] if ax is None: if latlon: fig, ax = plt.subplots(subplot_kw=dict( projection=ccrs.Mercator())) else: fig, ax = plt.subplots() # There doesn't seem to be "none" for transform, and the plotting # calls are similar whether or not we do a map. So, make a # dict with transform if we have it, otherwise leave it empty. trans_kw = {} if latlon: trans_kw['transform'] = ccrs.PlateCarree() retVal = dict() # we'll return a dictionary of plot artists # Get the groups: if not do_events: # just make a scatter plot grp_plt = ax.plot(groups.lon, groups.lat, linestyle='None', marker=group_marker, color=group_color, **trans_kw) retVal['groups'] = grp_plt[0] else: events = self.get_events(groups.id, combine=True) poly = event_poly(events, colors=event_color, latlon=latlon, fill=True) _ = ax.add_collection(poly) # If nothing else is plotted, then the x/y limits be MPL's default. # In this case, we'll want to set the x/y limits. # Otherwise, just add the events to the current view if (ax.get_xlim() == (0.0, 1.0)) & (ax.get_ylim() == (0.0, 1.0)): ax.autoscale() retVal['events_poly'] = poly if event_centers: ev_plt = ax.plot(events.lon, events.lat, linestyle='None', marker='.', color='black', markersize=0.5, **trans_kw) retVal['events_pt'] = ev_plt[0] if latlon & gridlines: gl = ax.gridlines(draw_labels=True, linestyle=':') retVal['gridlines'] = gl return ax, retVal def reset_active(self): """ Reset the active state of the underlying Ltg classes. See `pyltg.baseclass.reset_active`. """ self.events.reset_active() self.groups.reset_active() self.flashes.reset_active()