Beispiel #1
0
def total_energy_in_L2(files, lon_range = (-102.5, -99.5),
                       lat_range = (31, 35)):
    # Gather up all flashes over West Texas, which is all flashes in the
    # mesoscale domain centered on that location at the time of the sample fiels
    energy = 0.0
    for sf in files:
        glm = GLMDataset(sf)
        flashes_subset = glm.subset_flashes(lon_range = lon_range, 
                                            lat_range = lat_range)
        energy += flashes_subset.event_energy.sum().data
    return energy
Beispiel #2
0
def get_test_dataset():
    # filename ='/data/LCFA-production/OR_GLM-L2-LCFA_G16_s20171161230400_e20171161231000_c20171161231027.nc'
    # flash_ids=np.array([6359, 6472, 6666])
    path = get_sample_data_path()
    filename = os.path.join(path, 'FGE_split_merge_GLM.nc')
    flash_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    flash_ids.sort()
    glm = GLMDataset(filename)
    return glm, flash_ids
Beispiel #3
0
def ConvToCSV(input, output=None):
    '''input is the netdcf4 file you would like to turn into a csv
       output is the filename you would like to give it (without .csv)
       if output is none, it will use the date of the netcdf4 file
       returns the name of the file'''
    glm = GLMDataset(input)
    df = pd.DataFrame([
        glm.dataset.event_id.values, glm.dataset.event_time_offset.values,
        glm.dataset.event_lat.values, glm.dataset.event_lon.values,
        glm.dataset.event_energy.values
    ]).T
    if output == None:
        output = str(glm.dataset.product_time.values).replace(':',
                                                              '-').replace(
                                                                  '.', '-')
    df.columns = ['EventID', 'EventTime', 'Lat', 'Lon', 'Energy']
    grp = pd.DataFrame([
        glm.dataset.group_id.values, glm.dataset.group_time_offset.values,
        glm.dataset.group_lat.values, glm.dataset.group_lat.values,
        glm.dataset.group_energy.values, glm.dataset.group_area.values
    ]).T
    grp.columns = ['GroupID', 'GroupTime', 'Lat', 'Lon', 'Energy', 'GroupArea']
    ds = glm.dataset.flash_time_offset_of_last_event.values - glm.dataset.flash_time_offset_of_first_event.values
    flsh = pd.DataFrame([
        glm.dataset.flash_id.values,
        glm.dataset.flash_time_offset_of_first_event.values,
        glm.dataset.flash_lat.values, glm.dataset.flash_lon.values,
        glm.dataset.flash_energy.values, glm.dataset.flash_area.values,
        ds.astype(int)
    ]).T
    flsh.columns = [
        'FlashID', 'FlashTime', 'Lat', 'Lon', 'Energy', 'FlashArea',
        'Duration_ns'
    ]
    f1 = './temp1.csv'
    f2 = './temp2.csv'
    f3 = './temp3.csv'
    files = [f1, f2, f3]
    df.to_csv(f1)
    grp.to_csv(f2)
    flsh.to_csv(f3)

    with open('{}.csv'.format(output), 'wb') as wfd:
        for f in files:
            with open(f, 'rb') as fd:
                shutil.copyfileobj(fd, wfd)

    line_prepender(
        '{}.csv'.format(output),
        '{},{},{}'.format(int(glm.dataset.event_count.values),
                          int(glm.dataset.group_count.values),
                          int(glm.dataset.flash_count.values)))
    for f in files:
        os.remove(f)
    return output + '.csv'
Beispiel #4
0
def proc_each_grid(subgrid,
                   start_time=None,
                   end_time=None,
                   GLM_filenames=None):
    """ Process one tile (a subset of a larger grid) of GLM data.

    Arguments:
    subgrid -- tuple of (xi,yi), kwargs, proc_kwargs, out_kwargs, pads) where
        (xi, yi) -- the subgrid tile index
        kwargs -- passed to GLMGridder.__init__
        proc_kwargs -- passed to GLMGridder.process_flashes
        out_kwargs -- passed to GLMGridder.write_grids
        pads -- (n_x_pad, n_y_pad, x_pad, y_pad) counts and total distances of padding
            added to this subgrid

    Keyword arguments:
    start_time -- datetime object
    end_time -- datetime object
    GLM_filenames -- a list of GLM filenames to process
    """

    subgridij, kwargsij, process_flash_kwargs_ij, out_kwargs_ij, pads = subgrid
    ellipse_rev = process_flash_kwargs_ij.pop('ellipse_rev')

    # Eventually, we want to trim off n_x/y_pad from each side of the grid
    # n_x_pad, n_y_pad, x_pad, y_pad = pads

    log.info("out kwargs are", out_kwargs_ij)

    # These should all be independent at this point and can parallelize
    log.info(
        ('gridder kwargs for subgrid {0} are'.format(subgridij), kwargsij))
    if 'clip_events' in process_flash_kwargs_ij:
        gridder = GLMlutGridder(start_time, end_time, **kwargsij)
    else:
        gridder = GLMGridder(start_time, end_time, **kwargsij)

    if 'clip_events' in process_flash_kwargs_ij:
        xedge, yedge = np.meshgrid(gridder.xedge, gridder.yedge)
        mesh = QuadMeshSubset(xedge, yedge, n_neighbors=16 * 10, regular=True)
        # import pickle
        # with open('/data/LCFA-production/L1b/mesh_subset.pickle', 'wb') as f:
        # pickle.dump(mesh, f)
        process_flash_kwargs_ij['clip_events'] = mesh
        log.debug(("XEDGE", subgridij, xedge.min(), xedge.max(), xedge.shape))
        log.debug(("YEDGE", subgridij, yedge.min(), yedge.max(), yedge.shape))

    saved_first_file_metadata = False
    for filename in GLM_filenames:
        # Could create a cache of GLM objects by filename here.
        log.info("Processing {0}".format(filename))
        log.info(('process flash kwargs for {0} are'.format(subgridij),
                  process_flash_kwargs_ij))
        sys.stdout.flush()
        glm = GLMDataset(filename, ellipse_rev=ellipse_rev)
        # Pre-load the whole dataset, as recommended by the xarray docs.
        # This saves an absurd amount of time (factor of 80ish) in
        # grid.split_events.replicate_and_split_events
        if len(glm.dataset.number_of_events) > 0:
            # xarray 0.12.1 (and others?) throws an error when trying to load
            # data from an empty dimension.
            glm.dataset.load()

            if not saved_first_file_metadata:
                gridder.first_file_attrs = dict(glm.dataset.attrs)
                saved_first_file_metadata = True
            gridder.process_flashes(glm, **process_flash_kwargs_ij)
        else:
            log.info("Skipping {0} - number of events is 0".format(filename))
        glm.dataset.close()
        del glm
    log.info("Done processing all files, preparing to write")

    preprocess_out = out_kwargs_ij.pop('preprocess_out', None)
    if preprocess_out:  # in out_kwargs_ij:
        if 'output_kwargs' not in out_kwargs_ij:
            out_kwargs_ij['output_kwargs'] = {}
        # Used by GLMlutGridder.write_grids, but not the others.
        out_kwargs_ij['output_kwargs']['pad'] = preprocess_out.get_pad_slices()

        output = gridder.write_grids(**out_kwargs_ij)

        # Two things can happen here. If the lmatools CF NetCDF writer is used
        # (as it would be when using GLMGridder)
        # then write_all() is the step that actually does the writing after the
        # lmatools.FlashGridder.write_grids call is intercepted by the output
        # preprocessor. The GLMlutGridder, uses the pad slices kwarg
        # and skips the preprocessor, and just writes directly. write_all() is
        # does nothing in the GLMlutGridder case. It would be better
        # to resolve this inconsistency with a rearchitecture of how the
        # subgrids are handled - avoiding the hacky output preprocessor in
        # all cases.
        outfilenames = preprocess_out.write_all()
    else:
        outfilenames = gridder.write_grids(**out_kwargs_ij)

    return (subgridij, outfilenames)  # out_kwargs_ij
Beispiel #5
0
    gr_lon = flash_data.group_lon.data
    gr_rad = flash_data.group_energy.data
    fl_lat = flash_data.flash_lat.data
    fl_lon = flash_data.flash_lon.data
    fl_rad = flash_data.flash_energy.data
    fl_time = (flash_data.flash_time_offset_of_first_event.data[0], 
               flash_data.flash_time_offset_of_last_event.data[0])

    fig = plt.figure()
    ax_ev = fig.add_subplot(111)
    ax_ev.scatter(gr_lon, gr_lat, c=gr_rad, marker='o', s=100, 
                  edgecolor='black', cmap='gray_r') 
    #, vmin=glm.energy_min, vmax=glm.energy_max)
    ax_ev.scatter(ev_lons, ev_lats, c=ev_rad, marker='s', s=16, 
                  edgecolor='black', cmap='gray') 
    #, vmin=glm.energy_min, vmax=glm.energy_max)
    ax_ev.scatter(fl_lon, fl_lat, c='r', marker='x', s=100)
    ax_ev.set_title('GLM Flash #{0}\nfrom {1}\nto {2}'.format(fl_id[0], fl_time[0], fl_time[1]))

    # prevent scientific notation
    ax_ev.get_xaxis().get_major_formatter().set_useOffset(False)
    ax_ev.get_yaxis().get_major_formatter().set_useOffset(False)

    return fig


if __name__ == '__main__':
    from glmtools.plot.locations import plot_flash    
    from glmtools.io.glm import GLMDataset
    glm = GLMDataset('/data/LCFA-production/OR_GLM-L2-LCFA_G16_s20171161230400_e20171161231000_c20171161231027.nc')
    plot_flash(glm, 6666)
Beispiel #6
0
                 glmflidxd.flash_init_id_test.data):
        assert (int(v[1] - v[0]) == 0)
    new_glm = glmflidxd.reset_index('number_of_flashes').rename(
        {'number_of_flashes_': 'flash_id'})
    return new_glm


def calculate_flash_init(glm):
    new_glm = glm.entity_groups['flash_id'].map(
        assign_flash_init,
        child_groupby=glm.parent_groups['group_parent_flash_id'])
    return (new_glm)


glm = GLMDataset(
    '/Users/ebruning/Downloads/OR_GLM-L2-LCFA_G16_s20200150215200_e20200150215400_c20200150215427.nc'
)

# ----- METHOD 0: use the group data to calculate and print the flash IDs and
#                 the lat,lon of their first group.
t0 = time()
print('--- Method 0 ---')
flash_init_manual(glm)
print(time() - t0)

# ----- METHOD 1: as fast as above, but assigns back to the original dataset.
#                 You can use get_flash_init_data if you just want the data
#                 printed above.
t0 = time()
new_glm = add_flash_init_data(glm)
# print(new_glm)
Beispiel #7
0
def proc_each_grid(subgrid, start_time=None, end_time=None, GLM_filenames=None):
    """ Process one tile (a subset of a larger grid) of GLM data.

    Arguments:
    subgrid -- tuple of (xi,yi), kwargs, proc_kwargs, out_kwargs, pads) where
        (xi, yi) -- the subgrid tile index
        kwargs -- passed to GLMGridder.__init__
        proc_kwargs -- passed to GLMGridder.process_flashes
        out_kwargs -- passed to GLMGridder.write_grids
        pads -- (n_x_pad, n_y_pad, x_pad, y_pad) counts and total distances of padding
            added to this subgrid

    Keyword arguments:
    start_time -- datetime object
    end_time -- datetime object
    GLM_filenames -- a list of GLM filenames to process
    """

    subgridij, kwargsij, process_flash_kwargs_ij, out_kwargs_ij, pads = subgrid 
    ellipse_rev = process_flash_kwargs_ij.pop('ellipse_rev')

    # Eventually, we want to trim off n_x/y_pad from each side of the grid
    n_x_pad, n_y_pad, x_pad, y_pad = pads

    log.info("out kwargs are", out_kwargs_ij)

    # These should all be independent at this point and can parallelize
    log.info(('gridder kwargs for subgrid {0} are'.format(subgridij), kwargsij))
    if 'clip_events' in process_flash_kwargs_ij:
        gridder = GLMlutGridder(start_time, end_time, **kwargsij)
    else:
        gridder = GLMGridder(start_time, end_time, **kwargsij)

    if 'clip_events' in process_flash_kwargs_ij:
        xedge,yedge=np.meshgrid(gridder.xedge,gridder.yedge)
        mesh = QuadMeshSubset(xedge, yedge, n_neighbors=16*10, regular=True)
        # import pickle
        # with open('/data/LCFA-production/L1b/mesh_subset.pickle', 'wb') as f:
            # pickle.dump(mesh, f)
        process_flash_kwargs_ij['clip_events'] = mesh
        log.debug(("XEDGE", subgridij, xedge))
        log.debug(("YEDGE", subgridij, yedge))
    for filename in GLM_filenames:
        # Could create a cache of GLM objects by filename here.
        log.info("Processing {0}".format(filename))
        log.info(('process flash kwargs for {0} are'.format(subgridij),
            process_flash_kwargs_ij))
        sys.stdout.flush()
        glm = GLMDataset(filename, ellipse_rev=ellipse_rev)
        # Pre-load the whole dataset, as recommended by the xarray docs.
        # This saves an absurd amount of time (factor of 80ish) in
        # grid.split_events.replicate_and_split_events
        glm.dataset.load()
        gridder.process_flashes(glm, **process_flash_kwargs_ij)
        glm.dataset.close()
        del glm

    preprocess_out = out_kwargs_ij.pop('preprocess_out', None)
    if preprocess_out:
        output = gridder.write_grids(**out_kwargs_ij)
        outfilenames = preprocess_out.write_all()
    else:
        outfilenames = gridder.write_grids(**out_kwargs_ij)

    return (subgridij, outfilenames) # out_kwargs_ij
Beispiel #8
0
def FolderToCSV(path, output=None):
    '''Takes a folder and converts the contents (netcd4 GLM data) into a csv that can be read with CSVtoDFS.
       output is a filename you want to give it, otherwise it will generate a filename.'''
    flshs = pd.DataFrame()
    grps = pd.DataFrame()
    evnts = pd.DataFrame()
    for file in os.listdir(path):
        glm = GLMDataset('{}/{}'.format(path, file)).dataset

        if output == None:
            output = str(glm.product_time.values).replace(':', '-').replace(
                '.', '-')

        ds = glm.flash_time_offset_of_last_event.values - glm.flash_time_offset_of_first_event.values
        flsh = pd.DataFrame([
            glm.flash_id.values, glm.flash_time_offset_of_first_event.values,
            glm.flash_lat.values, glm.flash_lon.values,
            glm.flash_energy.values, glm.flash_area.values,
            ds.astype(int)
        ]).T
        flsh.columns = [
            'FlashID', 'FlashTime', 'Lat', 'Lon', 'Energy', 'FlashArea',
            'Duration_ns'
        ]

        grp = pd.DataFrame([
            glm.group_id.values, glm.group_time_offset.values,
            glm.group_lat.values, glm.group_lat.values,
            glm.group_energy.values, glm.group_area.values
        ]).T
        grp.columns = [
            'GroupID', 'GroupTime', 'Lat', 'Lon', 'Energy', 'GroupArea'
        ]

        evnt = pd.DataFrame([
            glm.event_id.values, glm.event_time_offset.values,
            glm.event_lat.values, glm.event_lon.values, glm.event_energy.values
        ]).T
        evnt.columns = ['EventID', 'EventTime', 'Lat', 'Lon', 'Energy']

        flshs = flshs.append(flsh)
        grps = grps.append(grp)
        evnts = evnts.append(evnt)

    f1 = path + 'temp1.csv'
    f2 = path + 'temp2.csv'
    f3 = path + 'temp3.csv'
    files = [f1, f2, f3]
    evnts.to_csv(f1)
    grps.to_csv(f2)
    flshs.to_csv(f3)

    with open(path + '../{}.csv'.format(output), 'wb') as wfd:
        for f in files:
            with open(f, 'rb') as fd:
                shutil.copyfileobj(fd, wfd)

    line_prepender(
        '../{}.csv'.format(output), '{},{},{}'.format(int(len(evnts)),
                                                      int(len(grps)),
                                                      int(len(flshs))))
    for f in files:
        os.remove(f)
    return output + '.csv'
Beispiel #9
0
""" This script accepts a list of filenames from the command line and attempts
to open each of them using `glmtools`. Because `glmtools` automatically performs
some flash-group-event parent-child calculations upon opening each file it is
a simple way to test for valid files. For instance, it confirms that each 
`event_parent_group_id` has a corresponding `group_id` entry.
"""

import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

import sys
from glmtools.io.glm import GLMDataset

filenames = sys.argv[1:]
for filename in filenames:
    try:
        glm = GLMDataset(filename)
    except KeyError as e:
        print(filename)
        logger.exception(e)