Example #1
0
def read_ATL06_data(ATL06_files, beam_pair=2, cycles=[1, 12]):
    '''
    Read ATL06 data from a list of files for a specific beam pair
    
    required arguments:
        ATL06_files: a list of ATL06 files
        beam_pair: pair number to read from the files   
        cycles: first and last cycles to include
    '''
    params_11 = ATL11.defaults()
    # read in the ATL06 data from all the repeats
    D6_list = []
    ATL06_re = re.compile('ATL06_\d+_\d\d\d\d(\d\d)\d\d_')
    for filename in ATL06_files:
        try:
            m = ATL06_re.search(filename)
            if (int(m.group(1)) < cycles[0]) or (int(m.group(1)) > cycles[1]):
                continue
        except Exception:
            pass
        try:
            D6_list.append(
                ATL06_data(field_dict=params_11.ATL06_field_dict,
                           beam_pair=beam_pair).from_file(filename))
        except KeyError:
            pass
    if len(D6_list) == 0:
        return None

    D6 = ATL06_data(beam_pair=beam_pair).from_list(D6_list)
    if D6.size == 0:
        return None
    # reorder data rows from D6 by cycle
    D6.index(np.argsort(D6.cycle_number[:, 0], axis=0))

    # choose the hemisphere and project the data to polar stereographic
    if np.max(D6.latitude) < 0:
        D6.get_xy(None, EPSG=3031)
    else:
        D6.get_xy(None, EPSG=3413)

    return D6
Example #2
0
def get_data_for_geo_index(query_results,
                           delta=[10000., 10000.],
                           fields=None,
                           data=None,
                           group='index',
                           dir_root=''):
    # read the data from a set of query results
    # Currently the function knows how to read:
    # h5_geoindex
    # indexed h5s
    # Qfit data (waveform and plain)
    # DEM data (filtered and not)
    # ATL06 data.
    # Append more cases as needed
    if len(dir_root) > 0:
        dir_root += '/'
    out_data = list()

    # some data types take a dictionary rather than a list of fields
    if isinstance(fields, dict):
        field_dict = fields
        field_list = None
    else:
        field_dict = None
        field_list = fields

    # if we are querying any DEM data, work out the bounds of the query so we don't have to read the whole DEMs
    all_types = [query_results[key]['type'] for key in query_results]
    if 'DEM' in all_types or 'filtered_DEM' in all_types:
        all_x = list()
        all_y = list()
        for key, result in query_results.items():
            all_x += result['x'].tolist()
            all_y += result['y'].tolist()
        bounds = [[np.min(all_x) - delta[0] / 2,
                   np.max(all_x) + delta[0] / 2],
                  [np.min(all_y) - delta[1] / 2,
                   np.max(all_y) + delta[1] / 2]]

    for file_key, result in query_results.items():
        if dir_root is not None:
            try:
                this_file = dir_root + file_key
            except TypeError:
                this_file = dir_root + file_key.decode()
        else:
            this_file = file_key
        if result['type'] == 'h5_geoindex':
            D = geo_index().from_file(this_file).query_xy(
                (result['x'], result['y']),
                fields=fields,
                get_data=True,
                dir_root=dir_root)
        if result['type'] == 'ATL06':
            if fields is None:
                fields = {
                    None: (u'latitude', u'longitude', u'h_li', u'delta_time')
                }
            D6_file, pair = this_file.split(':pair')
            D=[ATL06_data(beam_pair=int(pair), list_of_fields=field_list, field_dict=field_dict).from_file(\
                filename=D6_file, index_range=np.array(temp)) \
                for temp in zip(result['offset_start'], result['offset_end'])]
        if result['type'] == 'ATL11':
            D11_file, pair = this_file.split(':pair')
            D=[ATL11.data(beam_pair=int(pair), list_of_fields=field_list, field_dict=field_dict).from_file(\
                filename=D11_file, index_range=np.array(temp)) \
                for temp in zip(result['offset_start'], result['offset_end'])]
        if result['type'] == 'ATM_Qfit':
            D = [
                Qfit_data(filename=this_file, index_range=np.array(temp))
                for temp in zip(result['offset_start'], result['offset_end'])
            ]
        if result['type'] == 'ATM_waveform':
            D = [
                Qfit_data(filename=this_file,
                          index_range=np.array(temp),
                          waveform_format=True)
                for temp in zip(result['offset_start'], result['offset_end'])
            ]
        if result['type'] == 'DEM':
            D = dict()
            D['x'], D['y'], D['z'] = read_DEM(filename=this_file,
                                              asPoints=True,
                                              bounds=bounds)
            D['time'] = np.zeros_like(D['x']) + WV_MatlabDate(this_file)
            D = point_data().from_dict(D)
            D.index(D, np.isfinite(D.z))
        if result['type'] == 'filtered_DEM':
            D = dict()
            D['x'], D['y'], D['z'] = read_DEM(this_file,
                                              asPoints=True,
                                              band_num=1,
                                              keepAll=True,
                                              bounds=bounds)
            D['x'], D['y'], D['sigma'] = read_DEM(this_file,
                                                  asPoints=True,
                                                  band_num=2,
                                                  keepAll=True,
                                                  bounds=bounds)
            D['time'] = np.zeros_like(D['x']) + WV_MatlabDate(this_file)
            D = point_data().from_dict(D)
            D.index(np.isfinite(D.z) & np.isfinite(D.sigma))
            D.filename = this_file
        if result['type'] == 'indexed_h5':
            D = [
                read_indexed_h5_file(
                    this_file, [result['x'], result['y']],
                    fields=fields,
                    index_range=[result['offset_start'], result['offset_end']])
            ]
        if result['type'] == 'indexed_h5_from_matlab':
            D = [
                read_indexed_h5_file(
                    this_file, [result['x'] / 1000, result['y'] / 1000],
                    fields=fields,
                    index_range=[result['offset_start'], result['offset_end']])
            ]
        if result['type'] is None:
            D = [
                data.subset(np.arange(temp[0], temp[1]))
                for temp in zip(result['offset_start'], result['offset_end'])
            ]
        # add data to list of results.  May be a list or a single result
        if isinstance(D, list):
            for Di in D:
                if Di.filename is None:
                    Di.filename = this_file
            out_data += D
        else:
            if D.filename is None:
                D.filename = this_file
            out_data.append(D)
    return out_data
    # query ATL06 for the current bin, and index it

    D6list = D6_GI.query_xy([[bin_xy[0]], [bin_xy[1]]],
                            get_data=True,
                            fields=ATL06_fields)
    if not isinstance(D6list, list):
        D6list = [D6list]
    for item in D6list:
        item.BP = np.zeros_like(item.latitude) + item.beam_pair
        item.rgt = np.zeros_like(item.latitude) + item.rgt
        item.list_of_fields.append('BP')

    KK = ATL06_field_dict.copy()
    KK['Derived'] = ['BP']
    D6sub = ATL06_data(
        field_dict=KK).from_list(D6list).get_xy(SRS_proj4).get_xy(SRS_proj4)
    x0 = np.round(np.nanmean(D6sub.x, axis=1) / 100.) * 100
    y0 = np.round(np.nanmean(D6sub.y, axis=1) / 100.) * 100
    iB = np.argsort(x0 + (y0 - y0.min()) / (y0.max() - y0.min()))
    D6sub.index(iB)
    # index the sorted ATL06 data
    GI_D6sub = geo_index(delta=[100, 100]).from_xy(
        [np.nanmean(D6sub.x, axis=1),
         np.nanmean(D6sub.y, axis=1)])

    # find the common bins at 100 m
    GI_D6sub, GI_Qsub = GI_D6sub.intersect(GI_Qsub, pad=[0, 1])
    if GI_D6sub is None:
        print("no intersections found for ATL06 bin %s\n" % (bin_name))
        continue
Example #4
0
    def for_file(self, filename, file_type, number=0, dir_root=''):
        # make a geo_index for file 'filename'
        if dir_root is not None:
            # eliminate the string in 'dir_root' from the filename
            filename_out = filename.replace(dir_root, '')
        if file_type in ['ATL06']:
            temp = list()
            this_field_dict = {
                None: ('latitude', 'longitude', 'h_li', 'delta_time')
            }
            for beam_pair in (1, 2, 3):
                D = ATL06_data(
                    beam_pair=beam_pair,
                    field_dict=this_field_dict).from_file(filename).get_xy(
                        self.attrs['SRS_proj4'])
                if D.latitude.shape[0] > 0:
                    temp.append(geo_index(delta=self.attrs['delta'], SRS_proj4=\
                                          self.attrs['SRS_proj4']).from_xy([np.nanmean(D.x, axis=1), np.nanmean(D.y, axis=1)], '%s:pair%d' % (filename_out, beam_pair), 'ATL06', number=number))
            self.from_list(temp)
        if file_type in ['ATL11']:
            temp = list()
            this_field_dict = {'corrected_h': ('ref_pt_lat', 'ref_pt_lon')}
            for beam_pair in (1, 2, 3):
                D = ATL11.data().from_file(filename,
                                           pair=beam_pair,
                                           field_dict=this_field_dict).get_xy(
                                               self.attrs['SRS_proj4'])
                D.get_xy(self.attrs['SRS_proj4'])
                if D.x.shape[0] > 0:
                    temp.append(geo_index(delta=self.attrs['delta'], \
                                          SRS_proj4=self.attrs['SRS_proj4']).from_xy([D.x, D.y], '%s:pair%d' % (filename_out, beam_pair), 'ATL06', number=number))
            self.from_list(temp)

        if file_type in ['ATM_Qfit']:
            D = Qfit_data(filename=filename,
                          list_of_fields=['latitiude', 'longitude', 'time'])
            if D.latitude.shape[0] > 0:
                self.from_latlon(D.latitude,
                                 D.longitude,
                                 filename_out,
                                 'ATM_Qfit',
                                 number=number)
        if file_type in ['ATM_waveform']:
            D = Qfit_data(filename=filename, waveform_format=True)
            if D.latitude.shape[0] > 0:
                self.from_latlon(D.latitude,
                                 D.longitude,
                                 filename_out,
                                 'ATM_waveform',
                                 number=number)
        if file_type in ['filtered_DEM', 'DEM']:
            D = dict()
            D['x'], D['y'], D['z'] = read_DEM(filename, asPoints=True)
            if D['x'].size > 0:
                self.from_xy((D['x'], D['y']),
                             filename=filename_out,
                             file_type=file_type,
                             number=number)
        if file_type in ['h5_geoindex']:
            # read the file as a collection of points
            temp_GI = geo_index().from_file(filename)
            xy_bin = temp_GI.bins_as_array()
            # loop over a minimal set of attributes:
            for attr in ['delta', 'SRS_proj4', 'dir_root']:
                if attr in temp_GI.attrs:
                    self.attrs[attr] = temp_GI.attrs[attr]
            self.attrs['file_%d' % number] = filename_out
            self.attrs['type_%d' % number] = file_type
            if dir_root is not None:
                self.attrs['dir_root'] = dir_root
            self.attrs['n_files'] = 1
            self.from_xy(xy_bin,
                         filename=filename_out,
                         file_type=file_type,
                         number=number,
                         fake_offset_val=-1)
        if file_type in ['indexed_h5']:
            h5f = h5py.File(filename, 'r')
            if 'INDEX' in h5f:
                xy = [
                    np.array(h5f['INDEX']['bin_x']),
                    np.array(h5f['INDEX']['bin_y'])
                ]
                if 'bin_index' in h5f['INDEX']:
                    # this is the type of indexed h5 that has all of the data in single datasets
                    i0_i1 = h5f['INDEX']['bin_index']
                    first_last = [i0_i1[0, :].ravel(), i0_i1[1, :].ravel()]
                    fake_offset = None
                else:
                    first_last = None
                    fake_offset = -1
            else:
                # there is no index-- just a bunch of bins, maybe?
                first_last = None
                fake_offset = -1
                bin_re = re.compile("(.*)E_(.*)N")
                xy = [[], []]
                for key in h5f:
                    m = bin_re.match(key)
                    if m is None:
                        continue
                    xy[0].append(np.float(m.group(1)))
                    xy[1].append(np.float(m.group(2)))
                xy[0] = np.array(xy[0])
                xy[1] = np.array(xy[1])
            self.from_xy(xy,
                         filename=filename_out,
                         file_type=file_type,
                         number=number,
                         first_last=first_last,
                         fake_offset_val=fake_offset)
            if dir_root is not None:
                self.attrs['dir_root'] = dir_root
            h5f.close()
        if file_type in ['indexed_h5_from_matlab']:
            h5f = h5py.File(filename, 'r')
            xy = [
                np.array(h5f['INDEX']['bin_x']),
                np.array(h5f['INDEX']['bin_y'])
            ]
            first_last = None
            fake_offset = -1
            self.from_xy(xy,
                         filename_out,
                         file_type,
                         number=number,
                         first_last=first_last,
                         fake_offset_val=fake_offset)
            h5f.close()

        return self
Example #5
0
def readPointData(args):
    pointDataSets = dict()
    if args.dem:
        pointData = dict()
        pointData['x'], pointData['y'], pointData['h'] = read_DEM(
            args.pointFile, asPoints=True)
        pointDataSets[args.pointFile] = pointData
    elif args.index is True:
        pointDataSets = queryIndex(args.pointFile,
                                   args.demFile,
                                   verbose=args.verbose)
    elif args.ATL06 is False and args.ATL08 is False:
        pointData = dict()
        h5f = h5py.File(args.pointFile, 'r')
        try:
            pointData['latitude'] = np.array(h5f['latitude'])
            pointData['longitude'] = np.array(h5f['longitude'])
        except KeyError:
            pointData['x'] = np.array(h5f['x'])
            pointData['y'] = np.array(h5f['y'])
        pointData['h'] = np.array(h5f['h'])
        pointDataSets[args.pointFile] = pointData
        h5f.close()
    elif args.ATL06 is True:
        field_dict = {
            None:
            ['delta_time', 'h_li', 'h_li_sigma', 'latitude', 'longitude'],
            'geophysical': ['dac'],
            'fit_statistics': [
                'dh_fit_dx', 'h_rms_misfit', 'h_robust_sprd', 'n_fit_photons',
                'signal_selection_source', 'snr_significance',
                'w_surface_window_final', 'h_mean'
            ],
            'derived': ['valid']
        }

        beamPairs = [1, 2, 3]
        beams = ['l', 'r']
        pointData = dict()
        for beamPair in beamPairs:
            pairName = 'gt%d' % beamPair
            try:
                D6 = ATL06_data(beam_pair=beamPair,
                                field_dict=field_dict).from_file(
                                    args.pointFile)
                f06.phDensityFilter(D6,
                                    toNaN=True,
                                    subset=True,
                                    minDensity={
                                        'weak': 0.5,
                                        'strong': 2
                                    })
                #if D6.h_li.size==0:
                #    continue
                #f06.segDifferenceFilter(D6, tol=10, toNaN=True, subset=True)
                if D6.h_li.size == 0:
                    continue
                for ind, beam in enumerate(beams):
                    if D6.longitude.size < 50:
                        continue
                    group = pairName + beam
                    pointData['longitude'] = D6.longitude[:, ind].ravel()
                    pointData['latitude'] = D6.latitude[:, ind].ravel()
                    pointData['h'] = D6.h_mean[:, ind].ravel()
                    pointData['delta_time'] = D6.delta_time[:, ind].ravel()
                    these = np.where(np.isfinite(D6.latitude[:, ind]))[0]
                    if these.size > 2:
                        first = np.max(these)
                        last = np.min(these)
                        pointData['AD'] = np.sign(
                            D6.latitude[last, ind] -
                            D6.latitude[first, ind]) * np.ones_like(
                                pointData['h'])
                    if pointData['h'].size > 0:
                        pointDataSets[args.pointFile + ":" +
                                      group] = pointData.copy()
            except KeyError:
                print("pair %s not in %s\n" % (pairName, args.pointFile))
    if args.blockMedian is not None:
        for key in list(pointDataSets):
            pointDataSets[key]['x'], pointDataSets[key]['y'], pointDataSets[key]['h'] = \
                pt_blockmedian(pointDataSets[key]['x'], pointDataSets[key]['y'], pointDataSets[key]['h'], delta=args.blockMedian)
    # assign the 'name' property of the dataset
    for key in pointDataSets:
        pointDataSets[key]['name'] = key

    return pointDataSets