def total_energy_in_L2(files, lon_range = (-102.5, -99.5), lat_range = (31, 35)): # Gather up all flashes over West Texas, which is all flashes in the # mesoscale domain centered on that location at the time of the sample fiels energy = 0.0 for sf in files: glm = GLMDataset(sf) flashes_subset = glm.subset_flashes(lon_range = lon_range, lat_range = lat_range) energy += flashes_subset.event_energy.sum().data return energy
def get_test_dataset(): # filename ='/data/LCFA-production/OR_GLM-L2-LCFA_G16_s20171161230400_e20171161231000_c20171161231027.nc' # flash_ids=np.array([6359, 6472, 6666]) path = get_sample_data_path() filename = os.path.join(path, 'FGE_split_merge_GLM.nc') flash_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9] flash_ids.sort() glm = GLMDataset(filename) return glm, flash_ids
def ConvToCSV(input, output=None): '''input is the netdcf4 file you would like to turn into a csv output is the filename you would like to give it (without .csv) if output is none, it will use the date of the netcdf4 file returns the name of the file''' glm = GLMDataset(input) df = pd.DataFrame([ glm.dataset.event_id.values, glm.dataset.event_time_offset.values, glm.dataset.event_lat.values, glm.dataset.event_lon.values, glm.dataset.event_energy.values ]).T if output == None: output = str(glm.dataset.product_time.values).replace(':', '-').replace( '.', '-') df.columns = ['EventID', 'EventTime', 'Lat', 'Lon', 'Energy'] grp = pd.DataFrame([ glm.dataset.group_id.values, glm.dataset.group_time_offset.values, glm.dataset.group_lat.values, glm.dataset.group_lat.values, glm.dataset.group_energy.values, glm.dataset.group_area.values ]).T grp.columns = ['GroupID', 'GroupTime', 'Lat', 'Lon', 'Energy', 'GroupArea'] ds = glm.dataset.flash_time_offset_of_last_event.values - glm.dataset.flash_time_offset_of_first_event.values flsh = pd.DataFrame([ glm.dataset.flash_id.values, glm.dataset.flash_time_offset_of_first_event.values, glm.dataset.flash_lat.values, glm.dataset.flash_lon.values, glm.dataset.flash_energy.values, glm.dataset.flash_area.values, ds.astype(int) ]).T flsh.columns = [ 'FlashID', 'FlashTime', 'Lat', 'Lon', 'Energy', 'FlashArea', 'Duration_ns' ] f1 = './temp1.csv' f2 = './temp2.csv' f3 = './temp3.csv' files = [f1, f2, f3] df.to_csv(f1) grp.to_csv(f2) flsh.to_csv(f3) with open('{}.csv'.format(output), 'wb') as wfd: for f in files: with open(f, 'rb') as fd: shutil.copyfileobj(fd, wfd) line_prepender( '{}.csv'.format(output), '{},{},{}'.format(int(glm.dataset.event_count.values), int(glm.dataset.group_count.values), int(glm.dataset.flash_count.values))) for f in files: os.remove(f) return output + '.csv'
def proc_each_grid(subgrid, start_time=None, end_time=None, GLM_filenames=None): """ Process one tile (a subset of a larger grid) of GLM data. Arguments: subgrid -- tuple of (xi,yi), kwargs, proc_kwargs, out_kwargs, pads) where (xi, yi) -- the subgrid tile index kwargs -- passed to GLMGridder.__init__ proc_kwargs -- passed to GLMGridder.process_flashes out_kwargs -- passed to GLMGridder.write_grids pads -- (n_x_pad, n_y_pad, x_pad, y_pad) counts and total distances of padding added to this subgrid Keyword arguments: start_time -- datetime object end_time -- datetime object GLM_filenames -- a list of GLM filenames to process """ subgridij, kwargsij, process_flash_kwargs_ij, out_kwargs_ij, pads = subgrid ellipse_rev = process_flash_kwargs_ij.pop('ellipse_rev') # Eventually, we want to trim off n_x/y_pad from each side of the grid # n_x_pad, n_y_pad, x_pad, y_pad = pads log.info("out kwargs are", out_kwargs_ij) # These should all be independent at this point and can parallelize log.info( ('gridder kwargs for subgrid {0} are'.format(subgridij), kwargsij)) if 'clip_events' in process_flash_kwargs_ij: gridder = GLMlutGridder(start_time, end_time, **kwargsij) else: gridder = GLMGridder(start_time, end_time, **kwargsij) if 'clip_events' in process_flash_kwargs_ij: xedge, yedge = np.meshgrid(gridder.xedge, gridder.yedge) mesh = QuadMeshSubset(xedge, yedge, n_neighbors=16 * 10, regular=True) # import pickle # with open('/data/LCFA-production/L1b/mesh_subset.pickle', 'wb') as f: # pickle.dump(mesh, f) process_flash_kwargs_ij['clip_events'] = mesh log.debug(("XEDGE", subgridij, xedge.min(), xedge.max(), xedge.shape)) log.debug(("YEDGE", subgridij, yedge.min(), yedge.max(), yedge.shape)) saved_first_file_metadata = False for filename in GLM_filenames: # Could create a cache of GLM objects by filename here. log.info("Processing {0}".format(filename)) log.info(('process flash kwargs for {0} are'.format(subgridij), process_flash_kwargs_ij)) sys.stdout.flush() glm = GLMDataset(filename, ellipse_rev=ellipse_rev) # Pre-load the whole dataset, as recommended by the xarray docs. # This saves an absurd amount of time (factor of 80ish) in # grid.split_events.replicate_and_split_events if len(glm.dataset.number_of_events) > 0: # xarray 0.12.1 (and others?) throws an error when trying to load # data from an empty dimension. glm.dataset.load() if not saved_first_file_metadata: gridder.first_file_attrs = dict(glm.dataset.attrs) saved_first_file_metadata = True gridder.process_flashes(glm, **process_flash_kwargs_ij) else: log.info("Skipping {0} - number of events is 0".format(filename)) glm.dataset.close() del glm log.info("Done processing all files, preparing to write") preprocess_out = out_kwargs_ij.pop('preprocess_out', None) if preprocess_out: # in out_kwargs_ij: if 'output_kwargs' not in out_kwargs_ij: out_kwargs_ij['output_kwargs'] = {} # Used by GLMlutGridder.write_grids, but not the others. out_kwargs_ij['output_kwargs']['pad'] = preprocess_out.get_pad_slices() output = gridder.write_grids(**out_kwargs_ij) # Two things can happen here. If the lmatools CF NetCDF writer is used # (as it would be when using GLMGridder) # then write_all() is the step that actually does the writing after the # lmatools.FlashGridder.write_grids call is intercepted by the output # preprocessor. The GLMlutGridder, uses the pad slices kwarg # and skips the preprocessor, and just writes directly. write_all() is # does nothing in the GLMlutGridder case. It would be better # to resolve this inconsistency with a rearchitecture of how the # subgrids are handled - avoiding the hacky output preprocessor in # all cases. outfilenames = preprocess_out.write_all() else: outfilenames = gridder.write_grids(**out_kwargs_ij) return (subgridij, outfilenames) # out_kwargs_ij
gr_lon = flash_data.group_lon.data gr_rad = flash_data.group_energy.data fl_lat = flash_data.flash_lat.data fl_lon = flash_data.flash_lon.data fl_rad = flash_data.flash_energy.data fl_time = (flash_data.flash_time_offset_of_first_event.data[0], flash_data.flash_time_offset_of_last_event.data[0]) fig = plt.figure() ax_ev = fig.add_subplot(111) ax_ev.scatter(gr_lon, gr_lat, c=gr_rad, marker='o', s=100, edgecolor='black', cmap='gray_r') #, vmin=glm.energy_min, vmax=glm.energy_max) ax_ev.scatter(ev_lons, ev_lats, c=ev_rad, marker='s', s=16, edgecolor='black', cmap='gray') #, vmin=glm.energy_min, vmax=glm.energy_max) ax_ev.scatter(fl_lon, fl_lat, c='r', marker='x', s=100) ax_ev.set_title('GLM Flash #{0}\nfrom {1}\nto {2}'.format(fl_id[0], fl_time[0], fl_time[1])) # prevent scientific notation ax_ev.get_xaxis().get_major_formatter().set_useOffset(False) ax_ev.get_yaxis().get_major_formatter().set_useOffset(False) return fig if __name__ == '__main__': from glmtools.plot.locations import plot_flash from glmtools.io.glm import GLMDataset glm = GLMDataset('/data/LCFA-production/OR_GLM-L2-LCFA_G16_s20171161230400_e20171161231000_c20171161231027.nc') plot_flash(glm, 6666)
glmflidxd.flash_init_id_test.data): assert (int(v[1] - v[0]) == 0) new_glm = glmflidxd.reset_index('number_of_flashes').rename( {'number_of_flashes_': 'flash_id'}) return new_glm def calculate_flash_init(glm): new_glm = glm.entity_groups['flash_id'].map( assign_flash_init, child_groupby=glm.parent_groups['group_parent_flash_id']) return (new_glm) glm = GLMDataset( '/Users/ebruning/Downloads/OR_GLM-L2-LCFA_G16_s20200150215200_e20200150215400_c20200150215427.nc' ) # ----- METHOD 0: use the group data to calculate and print the flash IDs and # the lat,lon of their first group. t0 = time() print('--- Method 0 ---') flash_init_manual(glm) print(time() - t0) # ----- METHOD 1: as fast as above, but assigns back to the original dataset. # You can use get_flash_init_data if you just want the data # printed above. t0 = time() new_glm = add_flash_init_data(glm) # print(new_glm)
def proc_each_grid(subgrid, start_time=None, end_time=None, GLM_filenames=None): """ Process one tile (a subset of a larger grid) of GLM data. Arguments: subgrid -- tuple of (xi,yi), kwargs, proc_kwargs, out_kwargs, pads) where (xi, yi) -- the subgrid tile index kwargs -- passed to GLMGridder.__init__ proc_kwargs -- passed to GLMGridder.process_flashes out_kwargs -- passed to GLMGridder.write_grids pads -- (n_x_pad, n_y_pad, x_pad, y_pad) counts and total distances of padding added to this subgrid Keyword arguments: start_time -- datetime object end_time -- datetime object GLM_filenames -- a list of GLM filenames to process """ subgridij, kwargsij, process_flash_kwargs_ij, out_kwargs_ij, pads = subgrid ellipse_rev = process_flash_kwargs_ij.pop('ellipse_rev') # Eventually, we want to trim off n_x/y_pad from each side of the grid n_x_pad, n_y_pad, x_pad, y_pad = pads log.info("out kwargs are", out_kwargs_ij) # These should all be independent at this point and can parallelize log.info(('gridder kwargs for subgrid {0} are'.format(subgridij), kwargsij)) if 'clip_events' in process_flash_kwargs_ij: gridder = GLMlutGridder(start_time, end_time, **kwargsij) else: gridder = GLMGridder(start_time, end_time, **kwargsij) if 'clip_events' in process_flash_kwargs_ij: xedge,yedge=np.meshgrid(gridder.xedge,gridder.yedge) mesh = QuadMeshSubset(xedge, yedge, n_neighbors=16*10, regular=True) # import pickle # with open('/data/LCFA-production/L1b/mesh_subset.pickle', 'wb') as f: # pickle.dump(mesh, f) process_flash_kwargs_ij['clip_events'] = mesh log.debug(("XEDGE", subgridij, xedge)) log.debug(("YEDGE", subgridij, yedge)) for filename in GLM_filenames: # Could create a cache of GLM objects by filename here. log.info("Processing {0}".format(filename)) log.info(('process flash kwargs for {0} are'.format(subgridij), process_flash_kwargs_ij)) sys.stdout.flush() glm = GLMDataset(filename, ellipse_rev=ellipse_rev) # Pre-load the whole dataset, as recommended by the xarray docs. # This saves an absurd amount of time (factor of 80ish) in # grid.split_events.replicate_and_split_events glm.dataset.load() gridder.process_flashes(glm, **process_flash_kwargs_ij) glm.dataset.close() del glm preprocess_out = out_kwargs_ij.pop('preprocess_out', None) if preprocess_out: output = gridder.write_grids(**out_kwargs_ij) outfilenames = preprocess_out.write_all() else: outfilenames = gridder.write_grids(**out_kwargs_ij) return (subgridij, outfilenames) # out_kwargs_ij
def FolderToCSV(path, output=None): '''Takes a folder and converts the contents (netcd4 GLM data) into a csv that can be read with CSVtoDFS. output is a filename you want to give it, otherwise it will generate a filename.''' flshs = pd.DataFrame() grps = pd.DataFrame() evnts = pd.DataFrame() for file in os.listdir(path): glm = GLMDataset('{}/{}'.format(path, file)).dataset if output == None: output = str(glm.product_time.values).replace(':', '-').replace( '.', '-') ds = glm.flash_time_offset_of_last_event.values - glm.flash_time_offset_of_first_event.values flsh = pd.DataFrame([ glm.flash_id.values, glm.flash_time_offset_of_first_event.values, glm.flash_lat.values, glm.flash_lon.values, glm.flash_energy.values, glm.flash_area.values, ds.astype(int) ]).T flsh.columns = [ 'FlashID', 'FlashTime', 'Lat', 'Lon', 'Energy', 'FlashArea', 'Duration_ns' ] grp = pd.DataFrame([ glm.group_id.values, glm.group_time_offset.values, glm.group_lat.values, glm.group_lat.values, glm.group_energy.values, glm.group_area.values ]).T grp.columns = [ 'GroupID', 'GroupTime', 'Lat', 'Lon', 'Energy', 'GroupArea' ] evnt = pd.DataFrame([ glm.event_id.values, glm.event_time_offset.values, glm.event_lat.values, glm.event_lon.values, glm.event_energy.values ]).T evnt.columns = ['EventID', 'EventTime', 'Lat', 'Lon', 'Energy'] flshs = flshs.append(flsh) grps = grps.append(grp) evnts = evnts.append(evnt) f1 = path + 'temp1.csv' f2 = path + 'temp2.csv' f3 = path + 'temp3.csv' files = [f1, f2, f3] evnts.to_csv(f1) grps.to_csv(f2) flshs.to_csv(f3) with open(path + '../{}.csv'.format(output), 'wb') as wfd: for f in files: with open(f, 'rb') as fd: shutil.copyfileobj(fd, wfd) line_prepender( '../{}.csv'.format(output), '{},{},{}'.format(int(len(evnts)), int(len(grps)), int(len(flshs)))) for f in files: os.remove(f) return output + '.csv'
""" This script accepts a list of filenames from the command line and attempts to open each of them using `glmtools`. Because `glmtools` automatically performs some flash-group-event parent-child calculations upon opening each file it is a simple way to test for valid files. For instance, it confirms that each `event_parent_group_id` has a corresponding `group_id` entry. """ import logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) import sys from glmtools.io.glm import GLMDataset filenames = sys.argv[1:] for filename in filenames: try: glm = GLMDataset(filename) except KeyError as e: print(filename) logger.exception(e)