Ejemplo n.º 1
0
def __test__():
    import matplotlib.pyplot as plt
    from PointDatabase.point_data import point_data
    from PointDatabase.resample_path import resample_path
    x0 = np.arange(0, 13, .2)
    y0 = 0.01 * (x0 * 2)**2
    x0, y0 = resample_path(x0, y0, 0.1)
    x1 = np.arange(0.5, 4.95, .1)
    y1 = -0.25 * (x1**2) + x1 + 2
    x1, y1 = resample_path(x1, y1, 0.1)

    plt.figure()
    plt.plot(x0, y0)
    plt.plot(x1, y1)
    plt.axis('equal')
    D = [
        point_data().from_dict({
            'x': x0,
            'y': y0,
            'time': np.arange(len(x0))
        }),
        point_data().from_dict({
            'x': x1,
            'y': y1,
            'time': np.arange(len(x1))
        })
    ]

    xyC, inds, L = cross_tracks(D, delta=0.1, delta_coarse=0.5)
Ejemplo n.º 2
0
def read_tile(xy0, tile_dir, W=None):
    tile_file = tile_dir + ('/E%d_N%d.h5' % (xy0[0] / 1.e3, xy0[1] / 1.e3))

    D = dict()
    if W is not None:
        g = re.compile('N(.*)_E(.*).h5').search(os.path.basename(tile_file))
        x0 = np.float(g.group(1)) * 1000
        y0 = np.float(g.group(2)) * 1000
    else:
        x0 = None
        y0 = None
    with h5py.File(tile_file, 'r') as h5f:
        # figure out what fields to read:
        # find a group in h5f that contains fields:
        bin_re = re.compile('.*E_.*N')
        for group in h5f:
            if bin_re.match(group) is not None:
                list_of_fields = [field for field in h5f[group]]
                if len(list_of_fields) > 0:
                    break
        # next read all the data
        D = {field: [] for field in list_of_fields}
        for group in h5f:
            if bin_re.match(group) is not None:
                for field in list_of_fields:
                    D[field].append(np.array(h5f[group][field]))
        # concatenate the list of arrays in D:
        for field in D:
            D[field] = np.concatenate(D[field])
        # make D into a point_data instance:
        D = point_data(list_of_fields=[key for key in D.keys()]).from_dict(D)
    return reconstruct_tracks(D, x0=x0, y0=y0, W=W)
Ejemplo n.º 3
0
def read_xovers(xover_dir, verbose=False, wildcard='*'):
    tiles = glob.glob(xover_dir + '/*.h5')
    with h5py.File(tiles[0], 'r') as h5f:
        fields = [key for key in h5f['data_0'].keys()]

    D = []
    meta = {'slope_x': [], 'slope_y': [], 'grounded': []}
    #X=[]
    for tile in glob.glob(xover_dir + '/' + wildcard + '.h5'):
        try:
            with h5py.File(tile, 'r') as h5f:
                for field in ['slope_x', 'slope_y', 'grounded']:
                    meta[field].append(np.array(h5f['/' + field]))
        except KeyError:
            if verbose:
                print("failed to read " + tile)
            continue
        D.append([
            point_data(list_of_fields=fields).from_file(
                tile, field_dict={gr: fields}) for gr in ['data_0', 'data_1']
        ])

    for field in meta.keys():
        meta[field] = np.concatenate(meta[field])
    v = {}
    for field in fields:
        vi = []
        for Di in D:
            vi.append(np.r_[[
                np.sum(getattr(Di[ii], field) * Di[ii].W, axis=0)
                for ii in [0, 1]
            ]])
        v[field] = np.concatenate(vi, axis=1).T
    delta = {field: np.diff(v[field], axis=1) for field in fields}
    bar = {field: np.mean(v[field], axis=1) for field in fields}
    return v, delta, bar, meta
Ejemplo n.º 4
0
def read_indexed_h5_file(filename,
                         xy_bin,
                         fields=['x', 'y', 'time'],
                         index_range=[[-1], [-1]]):
    out_data = {field: list() for field in fields}
    h5f = h5py.File(filename, 'r')
    blank_fields = list()
    if isinstance(xy_bin, np.ndarray):
        xy_bin = [xy_bin[:, 0], xy_bin[:, 1]]

    if index_range[0][0] >= 0:
        # All the geo bins are together.  Use the index_range variable to read
        for field in fields:
            if field not in h5f:
                blank_fields.append(field)
                continue
            # make sure the index range gets iterated over properly.
            if len(index_range[0]) < 2:
                if len(h5f[field].shape) > 1:
                    out_data[field].append(
                        np.array(h5f[field][
                            0, int(index_range[0]):int(index_range[1])]))
                else:
                    out_data[field].append(
                        np.array(h5f[field]
                                 [int(index_range[0]):int(index_range[1])]))
            else:
                for i0_i1 in zip(index_range[0], index_range[1]):
                    if len(h5f[field].shape) > 1:
                        out_data[field].append(
                            np.array(h5f[field][0, i0_i1[0]:i0_i1[1]]))
                    else:
                        out_data[field].append(
                            np.array(h5f[field][i0_i1[0]:i0_i1[1]]))
    else:
        # this is a file with distinct bins, each with its own set of datasets
        for xy in zip(xy_bin[0], xy_bin[1]):
            bin_name = '%dE_%dN' % xy
            for field in fields:
                if field in h5f:
                    if bin_name in h5f[field]:
                        out_data[field].append(
                            np.array(h5f[field][bin_name]).squeeze())
                elif bin_name in h5f:
                    if field in h5f[bin_name]:
                        out_data[field].append(
                            np.array(h5f[bin_name][field]).squeeze())
                else:
                    blank_fields.append(field)
    h5f.close()
    for field in fields:
        if isinstance(out_data[field], list):
            if len(out_data[field]) > 1:
                try:
                    temp = list()
                    for item in out_data[field]:
                        if item.size > 0 and item.ndim > 0:
                            temp.append(item)
                        elif item.size == 1 and item.ndim == 0:
                            temp.append(np.array([item]))
                    if len(temp) > 1:
                        out_data[field] = np.concatenate(temp)
                    elif len(temp) == 0:
                        out_data[field] = np.zeros(0)
                    elif len(temp) == 1:
                        out_data[field] = temp[0]
                except ValueError as e:
                    print("ValueError in read_indexed_h5_file, continuing")
                    print(e)
            else:
                out_data[field] = np.array(out_data[field])
    return point_data(list_of_fields=fields).from_dict(out_data)
Ejemplo n.º 5
0
def get_data_for_geo_index(query_results,
                           delta=[10000., 10000.],
                           fields=None,
                           data=None,
                           group='index',
                           dir_root=''):
    # read the data from a set of query results
    # Currently the function knows how to read:
    # h5_geoindex
    # indexed h5s
    # Qfit data (waveform and plain)
    # DEM data (filtered and not)
    # ATL06 data.
    # Append more cases as needed
    if len(dir_root) > 0:
        dir_root += '/'
    out_data = list()

    # some data types take a dictionary rather than a list of fields
    if isinstance(fields, dict):
        field_dict = fields
        field_list = None
    else:
        field_dict = None
        field_list = fields

    # if we are querying any DEM data, work out the bounds of the query so we don't have to read the whole DEMs
    all_types = [query_results[key]['type'] for key in query_results]
    if 'DEM' in all_types or 'filtered_DEM' in all_types:
        all_x = list()
        all_y = list()
        for key, result in query_results.items():
            all_x += result['x'].tolist()
            all_y += result['y'].tolist()
        bounds = [[np.min(all_x) - delta[0] / 2,
                   np.max(all_x) + delta[0] / 2],
                  [np.min(all_y) - delta[1] / 2,
                   np.max(all_y) + delta[1] / 2]]

    for file_key, result in query_results.items():
        if dir_root is not None:
            try:
                this_file = dir_root + file_key
            except TypeError:
                this_file = dir_root + file_key.decode()
        else:
            this_file = file_key
        if result['type'] == 'h5_geoindex':
            D = geo_index().from_file(this_file).query_xy(
                (result['x'], result['y']),
                fields=fields,
                get_data=True,
                dir_root=dir_root)
        if result['type'] == 'ATL06':
            if fields is None:
                fields = {
                    None: (u'latitude', u'longitude', u'h_li', u'delta_time')
                }
            D6_file, pair = this_file.split(':pair')
            D=[ATL06_data(beam_pair=int(pair), list_of_fields=field_list, field_dict=field_dict).from_file(\
                filename=D6_file, index_range=np.array(temp)) \
                for temp in zip(result['offset_start'], result['offset_end'])]
        if result['type'] == 'ATL11':
            D11_file, pair = this_file.split(':pair')
            D=[ATL11.data(beam_pair=int(pair), list_of_fields=field_list, field_dict=field_dict).from_file(\
                filename=D11_file, index_range=np.array(temp)) \
                for temp in zip(result['offset_start'], result['offset_end'])]
        if result['type'] == 'ATM_Qfit':
            D = [
                Qfit_data(filename=this_file, index_range=np.array(temp))
                for temp in zip(result['offset_start'], result['offset_end'])
            ]
        if result['type'] == 'ATM_waveform':
            D = [
                Qfit_data(filename=this_file,
                          index_range=np.array(temp),
                          waveform_format=True)
                for temp in zip(result['offset_start'], result['offset_end'])
            ]
        if result['type'] == 'DEM':
            D = dict()
            D['x'], D['y'], D['z'] = read_DEM(filename=this_file,
                                              asPoints=True,
                                              bounds=bounds)
            D['time'] = np.zeros_like(D['x']) + WV_MatlabDate(this_file)
            D = point_data().from_dict(D)
            D.index(D, np.isfinite(D.z))
        if result['type'] == 'filtered_DEM':
            D = dict()
            D['x'], D['y'], D['z'] = read_DEM(this_file,
                                              asPoints=True,
                                              band_num=1,
                                              keepAll=True,
                                              bounds=bounds)
            D['x'], D['y'], D['sigma'] = read_DEM(this_file,
                                                  asPoints=True,
                                                  band_num=2,
                                                  keepAll=True,
                                                  bounds=bounds)
            D['time'] = np.zeros_like(D['x']) + WV_MatlabDate(this_file)
            D = point_data().from_dict(D)
            D.index(np.isfinite(D.z) & np.isfinite(D.sigma))
            D.filename = this_file
        if result['type'] == 'indexed_h5':
            D = [
                read_indexed_h5_file(
                    this_file, [result['x'], result['y']],
                    fields=fields,
                    index_range=[result['offset_start'], result['offset_end']])
            ]
        if result['type'] == 'indexed_h5_from_matlab':
            D = [
                read_indexed_h5_file(
                    this_file, [result['x'] / 1000, result['y'] / 1000],
                    fields=fields,
                    index_range=[result['offset_start'], result['offset_end']])
            ]
        if result['type'] is None:
            D = [
                data.subset(np.arange(temp[0], temp[1]))
                for temp in zip(result['offset_start'], result['offset_end'])
            ]
        # add data to list of results.  May be a list or a single result
        if isinstance(D, list):
            for Di in D:
                if Di.filename is None:
                    Di.filename = this_file
            out_data += D
        else:
            if D.filename is None:
                D.filename = this_file
            out_data.append(D)
    return out_data
Ejemplo n.º 6
0
def make_tile(args):

    xy0 = args['xy0']
    SRS_proj4 = args['SRS_proj4']
    tile_spacing = args['tile_spacing']
    bin_W = args['bin_W']
    GI_file = args['GI_file']
    out_dir = args['out_dir']
    field_dict = args['field_dict']
    seg_diff_scale = args['seg_diff_scale']
    blockmedian_scale = args['blockmedian_scale']
    dxb, dyb = np.meshgrid(
        np.arange(-tile_spacing / 2, tile_spacing / 2 + bin_W, bin_W),
        np.arange(-tile_spacing / 2, tile_spacing / 2 + bin_W, bin_W))
    dxb = dxb.ravel()
    dyb = dyb.ravel()

    list_of_fields = []
    for group in field_dict:
        for ds in field_dict[group]:
            list_of_fields.append(ds)

    gI = geo_index().from_file(GI_file, read_file=False)
    out_file = out_dir + ('/E%d_N%d.h5' % (xy0[0] / 1.e3, xy0[1] / 1.e3))
    if os.path.isfile(out_file):
        os.remove(out_file)

    D = gI.query_xy((xy0[0] + dxb, xy0[1] + dyb), fields=field_dict)
    if D is None:
        return
    file_dict = {}
    delete_list = []
    for file_num, Di in enumerate(D):
        Di.get_xy(SRS_proj4)
        Di.assign(
            {'source_file_num': np.zeros_like(Di.x, dtype=int) + file_num})
        if seg_diff_scale is not None:
            Di.h_li[Di.atl06_quality_summary == 1] = np.NaN
            segDifferenceFilter(Di, setValid=False, toNaN=True)
        Di.ravel_fields()
        if blockmedian_scale is not None:
            Di.index(np.isfinite(Di.h_li) & (Di.atl06_quality_summary == 0))
            try:
                ind = pt_blockmedian(Di.x,
                                     Di.y,
                                     Di.h_li,
                                     blockmedian_scale,
                                     return_index=True)[3]
            except Exception:
                delete_list.append(Di)
                continue
            Di.index(ind[:, 0])
        else:
            Di.index(np.isfinite(Di.h_li))
        file_dict[file_num] = Di.filename
    D_all = point_data(list_of_fields=list_of_fields +
                       ['x', 'y', 'source_file_num']).from_list(D)

    y_bin_function = np.round(D_all.y / bin_W)
    x_bin_function = np.round(D_all.x / bin_W)
    x_scale = np.nanmax(x_bin_function) - np.nanmin(x_bin_function)
    t_scale = np.nanmax(D_all.delta_time) - np.nanmin(D_all.delta_time)

    xy_bin_function = (y_bin_function -
                       np.nanmin(y_bin_function)) * x_scale + (
                           x_bin_function - np.nanmin(x_bin_function))
    xyt_bin_function = xy_bin_function + (
        D_all.delta_time - np.nanmin(D_all.delta_time)) / t_scale
    ind = np.argsort(xyt_bin_function)

    bin_dict = {}
    xy_bin_fn_sort = xy_bin_function[ind]
    fn_delta = np.concatenate([[-1],
                               np.where(np.diff(xy_bin_fn_sort))[0],
                               [xy_bin_fn_sort.size]])
    for ii in range(len(fn_delta) - 1):
        this_ind = ind[(fn_delta[ii] + 1):(fn_delta[ii + 1] + 1)]
        bin_dict[(x_bin_function[this_ind[0]],
                  y_bin_function[this_ind[0]])] = this_ind
    key_arr = np.array([key for key in bin_dict.keys()])
    key_order = np.argsort(key_arr[:, 1] - np.min(key_arr[:, 1]) * x_scale +
                           (key_arr[:, 0] - np.min(key_arr[:, 0])))
    key_arr = key_arr[key_order, :]
    for key in key_arr:
        this_group = '%dE_%dN' % tuple(key * bin_W)
        D_all.subset(bin_dict[tuple(key)]).to_file(out_file,
                                                   replace=False,
                                                   group=this_group)

    with h5py.File(out_file, 'r+') as h5f:
        grp = h5f.create_group("source_files")
        for key in file_dict:
            grp.attrs['file_%d' % key] = file_dict[key]
Ejemplo n.º 7
0
def get_xover_data(x0, y0, rgt, GI_files, xover_cache, index_bin_size,
                   params_11):
    """
    Read the data from other tracks.

    Maintain a cache of data so that subsequent reads don't have to reload data from disk
    Inputs:
        x0, y0: bin centers
        rgt: current rgt
        GI_files: lsti of geograpic index file
        xover_cache: data cache (dict)
        index_bin_size: size of the bins in the index
        params_11: default parameter values for the ATL11 fit

    """

    # identify the crossover centers
    x0_ctrs = buffered_bins(x0, y0, 2 * params_11.L_search_XT, index_bin_size)
    D_xover = []

    for x0_ctr in x0_ctrs:
        this_key = (np.real(x0_ctr), np.imag(x0_ctr))
        # check if we have already read in the data for this bin
        if this_key not in xover_cache:
            # if we haven't already read in the data, read it in.  These data will be in xover_cache[this_key]
            temp = []
            for GI_file in GI_files:
                new_data = geo_index().from_file(GI_file).query_xy(
                    this_key, fields=params_11.ATL06_xover_field_list)
                if new_data is not None:
                    temp += new_data
            if len(temp) == 0:
                xover_cache[this_key] = None
                continue
            xover_cache[this_key] = {
                'D':
                point_data(list_of_fields=params_11.ATL06_xover_field_list).
                from_list(temp)
            }
            # remove the current rgt from data in the cache
            xover_cache[this_key]['D'].index(
                ~np.in1d(xover_cache[this_key]['D'].rgt, [rgt]))
            if xover_cache[this_key]['D'].size == 0:
                continue
            xover_cache[this_key]['D'].get_xy(EPSG=params_11.EPSG)
            # index the cache at 100-m resolution
            xover_cache[this_key]['index'] = geo_index(
                delta=[100, 100], data=xover_cache[this_key]['D'])
        # now read the data from the crossover cache
        if (xover_cache[this_key] is not None) and (xover_cache[this_key]['D']
                                                    is not None):
            try:
                Q = xover_cache[this_key]['index'].query_xy([x0, y0],
                                                            pad=1,
                                                            get_data=False)
            except KeyError:
                Q = None
            if Q is None:
                continue
            # if we have read in any data for the current bin, subset it to the bins around the reference point
            for key in Q:
                for i0, i1 in zip(Q[key]['offset_start'],
                                  Q[key]['offset_end']):
                    D_xover.append(xover_cache[this_key]['D'].subset(
                        np.arange(i0, i1 + 1, dtype=int)))
    if len(D_xover) > 0:
        D_xover = point_data().from_list(D_xover)

    # cleanup the cache if it is too large
    if len(xover_cache.keys()) > 50:
        cleanup_xover_cache(xover_cache, x0, y0, 2e4)

    return D_xover