Ejemplo n.º 1
0
def select_repeat_data(data, grids, repeat_dt, resolution):
    """
        Select data that are repeats
        
        input arguments:
            data: input data
            grids: grids
            repeat_dt: time interval by which repeats must be separated to count
            resolution: spatial resolution of repeat calculation
    """
    repeat_grid = fd_grid(grids['z0'].bds,
                          resolution * np.ones(2),
                          name='repeat')
    t_coarse = np.round(
        (data.time - grids['dz'].bds[2][0]) / repeat_dt) * repeat_dt
    grid_repeat_count = np.zeros(np.prod(repeat_grid.shape))
    for t_val in np.unique(t_coarse):
        # select the data points for each epoch
        ii = t_coarse == t_val
        # use the lin_op.interp_mtx to find the grid points associated with each node
        grid_repeat_count += np.asarray(
            lin_op(repeat_grid).interp_mtx((
                data.y[ii], data.x[ii])).toCSR().sum(axis=0) > 0.5).ravel()
    data_repeats = lin_op(repeat_grid).interp_mtx(
        (data.y, data.x)).toCSR().dot(
            (grid_repeat_count > 1).astype(np.float64))
    return data_repeats > 0.5
Ejemplo n.º 2
0
def param_bias_matrix(data,
                      bias_model,
                      col_0=0,
                      bias_param_name='data_bias',
                      op_name='data_bias'):
    """
        Make a matrix that adds a set of parameters representing the biases of a set of data.
        
        input arguments:
             data: data for the problem.  Must containa parameter with the name specified in 'bias_param_name
             bias_model: bias_model dict from assign_bias_params             
             col_0: the first column of the matrix.
             bias_param_name: name of the parameter used to assign the biases.  Defaults to 'data_bias'
             op_name: the name for the output bias operator.
         output_arguments:
             G_bias: matrix that gives the biases for each parameter
             Gc_bias: matrix that gives the bias values (constraint matrix)
             E_bias: expected value for each bias parameter
             bias_model: bias model dict as defined in assign_bias_ID
    """
    col = getattr(data, bias_param_name).astype(int)
    col_N = col_0 + np.max(col) + 1
    G_bias = lin_op(name=op_name, col_0=col_0,
                    col_N=col_N).data_bias(np.arange(data.size),
                                           col=col_0 + col)
    ii = np.arange(col.max(), dtype=int)
    Gc_bias = lin_op(name='contstraint_' + op_name, col_0=col_0,
                     col_N=col_N).data_bias(ii, col=col_0 + ii)
    E_bias = [bias_model['E_bias'][ind] for ind in ii]
    for key in bias_model['bias_ID_dict']:
        bias_model['bias_ID_dict'][key]['col'] = col_0 + key
    return G_bias, Gc_bias, E_bias, bias_model
Ejemplo n.º 3
0
def parse_model(m, m0, G_data, G_dzbar, TOC, grids, bias_params, bias_model, dzdt_lags=None):

    # reshape the components of m to the grid shapes
    m['z0']=np.reshape(m0[TOC['cols']['z0']], grids['z0'].shape)
    m['dz']=np.reshape(m0[TOC['cols']['dz']], grids['dz'].shape)
    m['count']=np.reshape(np.array(G_data.toCSR().tocsc()[:,TOC['cols']['dz']].sum(axis=0)), grids['dz'].shape)
    
    # calculate height rates
    for lag in dzdt_lags:
        this_name='dzdt_lag%d' % lag
        m[this_name]=lin_op(grids['dz'], name='dzdt', col_N=G_data.col_N).dzdt(lag=lag).grid_prod(m0)
    
    # calculate the grid mean of dz
    m['dz_bar']=G_dzbar.dot(m0)

    # build a matrix that takes the lagged temporal derivative of dzbar (e.g. quarterly dzdt, annual dzdt)
    for lag in dzdt_lags:
        this_name='dzdt_bar_lag%d' % lag
        this_op=lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR()
        # calculate the grid mean of dz/dt
        m[this_name]=this_op.dot(m['dz_bar'].ravel())

    # report the parameter biases.  Sorted in order of the parameter bias arguments
    m['bias'], m['slope_bias']=parse_biases(m0, bias_model, bias_params)

    # report the entire model vector, just in case we want it.
    m['all']=m0

    # report the geolocation of the output map
    m['extent']=np.concatenate((grids['z0'].bds[1], grids['z0'].bds[0]))
Ejemplo n.º 4
0
def setup_PS_bias(data, G_data, constraint_op_list, grids, bds, args):
    '''
    set up a matrix to fit a smooth POCA-vs-Swath bias
    '''
    grids['PS_bias']=fd_grid( [bds['y'], bds['x']], \
       [args['spacing']['dz'], args['spacing']['dz']],\
       name='PS_bias', srs_proj4=args['srs_proj4'],\
       mask_file=args['mask_file'], mask_data=args['mask_data'], \
       col_0=grids['dz'].col_N)
    ps_mtx=lin_op(grid=grids['PS_bias'], name='PS_bias').\
        interp_mtx(data.coords()[0:2])
    # POCA rows should have zero entries
    temp = ps_mtx.v.ravel()
    temp[np.in1d(ps_mtx.r.ravel(), np.flatnonzero(data.swath == 0))] = 0
    ps_mtx.v = temp.reshape(ps_mtx.v.shape)
    G_data.add(ps_mtx)
    #Build a constraint matrix for the curvature of the PS bias
    grad2_ps = lin_op(grids['PS_bias'], name='grad2_PS').grad2(DOF='PS_bias')
    grad2_ps.expected=args['E_RMS_d2x_PS_bias']+np.zeros(grad2_ps.N_eq)/\
        np.sqrt(np.prod(grids['dz'].delta[0:2]))
    #Build a constraint matrix for the magnitude of the PS bias
    mag_ps=lin_op(grids['PS_bias'], name='mag_ps').data_bias(\
                ind=np.arange(grids['PS_bias'].N_nodes),
                col=np.arange(grids['PS_bias'].col_0, grids['PS_bias'].col_N))
    mag_ps.expected = args['E_RMS_PS_bias'] + np.zeros(mag_ps.N_eq)
    constraint_op_list.append(grad2_ps)
    #constraint_op_list.append(grad_ps)
    constraint_op_list.append(mag_ps)
Ejemplo n.º 5
0
def setup_smoothness_constraints(grids, constraint_op_list, E_RMS, mask_scale):
    """
    Setup the smoothness constraint operators for dz and z0

    Inputs:
    grids: (dict) dictionary of fd_grid objects generated by setup_grids
    constraint_op_list: (list) list of lin_op objects containing constraint equations that penalize the solution for roughness.
    E_RMS: (dict) constraint weights.  May have entries: 'd2z0_dx2', 'd2z0_dx2', 'd2z0_dx', 'd3z_dx2dt', 'd2z_dxdt', 'd2z_dt2'  Each specifies the penealty for each derivative of the DEM (z0), or the height changes(dz)
    mask_scale: (dict) mapping between mask values (in grids[].mask) and constraint weights.  Keys and values should be floats

    Outputs:
    None (appends to constraint_op_list)
    """
    # make the smoothness constraints for z0
    root_delta_A_z0 = np.sqrt(np.prod(grids['z0'].delta))
    grad2_z0 = lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad2_z0.expected = E_RMS[
        'd2z0_dx2'] / root_delta_A_z0 * grad2_z0.mask_for_ind0(mask_scale)

    constraint_op_list += [grad2_z0]
    if 'dz0_dx' in E_RMS:
        grad_z0 = lin_op(grids['z0'], name='grad_z0').grad(DOF='z0')
        grad_z0.expected = E_RMS[
            'dz0_dx'] / root_delta_A_z0 * grad_z0.mask_for_ind0(mask_scale)
        constraint_op_list += [grad_z0]

    # make the smoothness constraints for dz
    root_delta_V_dz = np.sqrt(np.prod(grids['dz'].delta))
    if 'd3z_dx2dt' in E_RMS and E_RMS['d3z_dx2dt'] is not None:
        grad2_dz = lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z',
                                                                     t_lag=1)
        grad2_dz.expected = E_RMS[
            'd3z_dx2dt'] / root_delta_V_dz * grad2_dz.mask_for_ind0(mask_scale)
        constraint_op_list += [grad2_dz]
    if 'd2z_dxdt' in E_RMS and E_RMS['d2z_dxdt'] is not None:
        grad_dzdt = lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z',
                                                                    t_lag=1)
        grad_dzdt.expected = E_RMS[
            'd2z_dxdt'] / root_delta_V_dz * grad_dzdt.mask_for_ind0(mask_scale)
        constraint_op_list += [grad_dzdt]
    if 'd2z_dt2' in E_RMS and E_RMS['d2z_dt2'] is not None:
        d2z_dt2 = lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z')
        d2z_dt2.expected = np.zeros(
            d2z_dt2.N_eq) + E_RMS['d2z_dt2'] / root_delta_V_dz
        constraint_op_list += [d2z_dt2]
    for constraint in constraint_op_list:
        if np.any(constraint.expected == 0):
            raise (ValueError(
                f'found zero value in the expected values for {constraint.name}'
            ))
Ejemplo n.º 6
0
def setup_avg_mask_ops(grid, col_N, avg_masks, dzdt_lags):
    if avg_masks is None:
        return {}
    avg_ops = {}
    for name, mask in avg_masks.items():
        this_name = name + '_avg_dz'
        avg_ops[this_name] = lin_op(grid, col_N=col_N,
                                    name=this_name).mean_of_mask(mask,
                                                                 dzdt_lag=None)
        for lag in dzdt_lags:
            this_name = name + f'_avg_dzdt_lag{lag}'
            avg_ops[this_name] = lin_op(
                grid, col_N=col_N, name=this_name).mean_of_mask(mask,
                                                                dzdt_lag=lag)
    return avg_ops
Ejemplo n.º 7
0
def data_slope_bias(data,  bias_model, col_0=0, sensors=[], op_name='data_slope'):
    """
        Make a matrix that adds a set of parameters representing the biases of a set of data.

        input arguments:
             data: data for the problem.  Must contain parameters 'x' and 'y'
             bias_model: bias_model dict from assign_bias_params
             col_0: the first column of the matrix.
             op_name: the name for the output bias operator.
         output_arguments:
             G_bias: matrix that gives the biases for each parameter
             Gc_bias: matrix that gives the bias values (constraint matrix)
             E_bias: expected value for each bias parameter
             bias_model: bias model dict as defined in assign_bias_ID
    """
    if 'slope_bias_dict' not in bias_model:
        bias_model['slope_bias_dict']={}

    these_sensors=sensors[np.in1d(sensors, data.sensor)]

    col_N=col_0+2*len(these_sensors)
    rr, cc, vv=[[], [], []]
    
    for d_col_1, sensor in enumerate(these_sensors):
        rows=np.flatnonzero(data.sensor==sensor)
        bias_model['slope_bias_dict'][sensor]=col_0+d_col_1*2+np.array([0, 1])
        for d_col_2, var in enumerate(['x', 'y']):
            delta=getattr(data, var)[rows]
            delta -= np.nanmean(delta)
            rr += [rows]
            cc += [np.zeros_like(rows, dtype=int) + int(col_0+d_col_1*2+d_col_2)]
            vv += [delta/1000]

    G_bias = lin_op(col_0=col_0, col_N=col_N, name=op_name)
    G_bias.r = np.concatenate(rr)
    G_bias.c = np.concatenate(cc)
    G_bias.v = np.concatenate(vv)

    ii=np.arange(col_0, col_N, dtype=int)
    Gc_bias=lin_op(name='constraint_'+op_name, col_0=col_0, col_N=col_N).data_bias(ii-ii[0], col=ii)
    E_bias=bias_model['E_slope']+np.zeros(2*len(these_sensors))

    return G_bias, Gc_bias, E_bias, bias_model
Ejemplo n.º 8
0
def setup_bias_fit(data,
                   bias_model,
                   G_data,
                   constraint_op_list,
                   bias_param_name='data_bias',
                   op_name='data_bias'):
    """
        Setup a set of parameters representing the biases of a set of data

        input arguments:
             data: data for the problem.  Must contain a parameter with the name specified in 'bias_param_name
             bias_model: bias_model dict from assign_bias_params
             G_data: coefficient matrix for least-squares fit.  New bias parameters will be added to right of existing parameters
             constraint_op_list: list of constraint-equation operators
             bias_param_name: name of the parameter used to assign the biases.  Defaults to 'data_bias'
             op_name: the name for the output bias operator.
    """
    # the field in bias_param_name defines the relative column in the bias matrix
    # for the DOF constrained
    col = getattr(data, bias_param_name).astype(int)
    # the new columns are appended to the right of G_data
    col_0 = G_data.col_N
    col_N = G_data.col_N + np.max(col) + 1
    # The bias matrix is just a 1 in the column for the bias parameter for each
    # data value
    G_bias = lin_op(name=op_name, col_0=col_0,
                    col_N=col_N).data_bias(np.arange(data.size),
                                           col=col_0 + col)
    # the constraint matrix has a 1 for each column, is zero otherwise
    ii = np.arange(col.max() + 1, dtype=int)
    Gc_bias = lin_op(name='constraint_' + op_name, col_0=col_0,
                     col_N=col_N).data_bias(ii, col=col_0 + ii)
    for key in bias_model['bias_ID_dict']:
        bias_model['bias_ID_dict'][key]['col'] = col_0 + key
    # the confidence for each bias parameter being zero is in bias_model['E_bias']
    Gc_bias.expected = np.array([bias_model['E_bias'][ind] for ind in ii])
    if np.any(Gc_bias.expected == 0):
        raise (ValueError('found an zero value in the expected biases'))
    constraint_op_list.append(Gc_bias)
    G_data.add(G_bias)
Ejemplo n.º 9
0
def parse_errors(E, Gcoo, TCinv, rhs, Ip_c, Ip_r, grids, G_data, Gc, G_dzbar, bias_model, bias_params, dzdt_lags=None, timing={}):
    tic=time()
    # take the QZ transform of Gcoo  # TEST WHETHER rhs can just be a vector of ones
    z, R, perm, rank=sparseqr.rz(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)))
    z=z.ravel()
    R=R.tocsr()
    R.sort_indices()
    R.eliminate_zeros()
    timing['decompose_qz']=time()-tic

    E0=np.zeros(R.shape[0])

    # compute Rinv for use in propagating errors.
    # what should the tolerance be?  We will eventually square Rinv and take its
    # row-wise sum.  We care about errors at the cm level, so
    # size(Rinv)*tol^2 = 0.01 -> tol=sqrt(0.01/size(Rinv))~ 1E-4
    tic=time(); RR, CC, VV, status=inv_tr_upper(R, np.int(np.prod(R.shape)/4), 1.e-5);
    # save Rinv as a sparse array.  The syntax perm[RR] undoes the permutation from QZ
    Rinv=sp.coo_matrix((VV, (perm[RR], CC)), shape=R.shape).tocsr(); timing['Rinv_cython']=time()-tic;
    tic=time(); E0=np.sqrt(Rinv.power(2).sum(axis=1)); timing['propagate_errors']=time()-tic;
    
    # generate the full E vector.  E0 appears to be an ndarray,
    E0=np.array(Ip_c.dot(E0)).ravel()
    E['z0']=np.reshape(E0[Gc.TOC['cols']['z0']], grids['z0'].shape)
    E['dz']=np.reshape(E0[Gc.TOC['cols']['dz']], grids['dz'].shape)

    # generate the lagged dz errors:

    for lag in dzdt_lags:
        this_name='dzdt_lag%d' % lag
        E[this_name]=lin_op(grids['dz'], name=this_name, col_N=G_data.col_N).dzdt(lag=lag).grid_error(Ip_c.dot(Rinv))
        # note: this should probably be dotted with the G_dzbar op.  lag op is nlag*nt, G_dzbar is nt*ncols, R is NcolsxNcols RUN ONE LINE AT A TIME
        this_name='dzdt_bar_lag%d' % lag
        this_op=lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR().dot(G_dzbar)
        E[this_name]=np.sqrt((this_op.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))
         
    # generate the grid-mean error for zero lag  
    E['dz_bar']=np.sqrt((G_dzbar.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))
    
    E['bias'], E['slope_bias']=parse_biases(E0, bias_model, bias_params)
Ejemplo n.º 10
0
def sum_cell_area(grid_f,
                  grid_c,
                  cell_area_f=None,
                  return_op=False,
                  sub0s=None,
                  taper=True):
    # calculate the area of masked cells in a coarse grid within the cells of a fine grid
    if cell_area_f is None:
        cell_area_f = calc_cell_area(grid_f) * grid_f.mask
    n_k = (grid_c.delta[0:2] / grid_f.delta[0:2] + 1).astype(int)
    temp_grid = fd_grid((grid_f.bds[0:2]), deltas=grid_f.delta[0:2])
    fine_to_coarse = lin_op(grid=temp_grid).sum_to_grid3(
        n_k, sub0s=sub0s, taper=True, valid_equations_only=False, dims=[0, 1])
    result = fine_to_coarse.toCSR().dot(cell_area_f.ravel()).reshape(
        grid_c.shape[0:2])
    if return_op:
        return result, fine_to_coarse
    return result
Ejemplo n.º 11
0
def setup_mask(data, grids, valid_data, bds, args):
    '''
    Mark datapoints for which the mask is zero as invalid
    Inputs:
    data: (pc.data) data structure.
    grids: (dict) dictionary of fd_grid objects generated by setup_grids
    valid_data: (numpy boolean array, size(data)) indicates valid data points
    bds: (dict) a dictionary specifying the domain bounds in x, y, and t (2-element vector for each)

    '''

    temp = fd_grid([bds['y'], bds['x']],
                   [args['spacing']['z0'], args['spacing']['z0']],
                   name='z0',
                   srs_proj4=args['srs_proj4'],
                   mask_file=args['mask_file'],
                   mask_data=args['mask_data'])
    data_mask = lin_op(temp, name='interp_z').interp_mtx(
        data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel())
    data_mask[~np.isfinite(data_mask)] = 0
    if np.any(data_mask == 0):
        data.index(~(data_mask == 0))
        valid_data[valid_data] = ~(data_mask == 0)
Ejemplo n.º 12
0
def smooth_xyt_fit(**kwargs):
    required_fields = ('data', 'W', 'ctr', 'spacing', 'E_RMS')
    args = {
        'reference_epoch': 0,
        'W_ctr': 1e4,
        'mask_file': None,
        'mask_scale': None,
        'compute_E': False,
        'max_iterations': 10,
        'srs_WKT': None,
        'N_subset': None,
        'bias_params': None,
        'repeat_res': None,
        'repeat_dt': 1,
        'Edit_only': False,
        'dzdt_lags': [1, 4],
        'VERBOSE': True
    }
    args.update(kwargs)
    for field in required_fields:
        if field not in kwargs:
            raise ValueError("%s must be defined", field)
    valid_data = np.ones_like(args['data'].x, dtype=bool)
    timing = dict()

    if args['N_subset'] is not None:
        tic = time()
        valid_data = edit_data_by_subset_fit(args['N_subset'], args)
        timing['edit_by_subset'] = time() - tic
        if args['Edit_only']:
            return {
                'timing': timing,
                'data': args['data'].copy().subset(valid_data)
            }
    m = dict()
    E = dict()

    # define the grids
    tic = time()
    bds = {
        coord: args['ctr'][coord] + np.array([-0.5, 0.5]) * args['W'][coord]
        for coord in ('x', 'y', 't')
    }
    grids = dict()
    grids['z0'] = fd_grid([bds['y'], bds['x']],
                          args['spacing']['z0'] * np.ones(2),
                          name='z0',
                          srs_WKT=args['srs_WKT'],
                          mask_file=args['mask_file'])
    grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \
        [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], col_0=grids['z0'].N_nodes, name='dz', srs_WKT=args['srs_WKT'], mask_file=args['mask_file'])
    grids['z0'].col_N = grids['dz'].col_N
    grids['t'] = fd_grid([bds['t']], [args['spacing']['dt']], name='t')

    # select only the data points that are within the grid bounds
    valid_z0 = grids['z0'].validate_pts((args['data'].coords()[0:2]))
    valid_dz = grids['dz'].validate_pts((args['data'].coords()))
    valid_data = valid_data & valid_dz & valid_z0

    # if repeat_res is given, resample the data to include only repeat data (to within a spatial tolerance of repeat_res)
    if args['repeat_res'] is not None:
        valid_data[valid_data]=valid_data[valid_data] & \
            select_repeat_data(args['data'].copy().subset(valid_data), grids, args['repeat_dt'], args['repeat_res'])

    # subset the data based on the valid mask
    data = args['data'].copy().subset(valid_data)

    # if we have a mask file, use it to subset the data
    # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails.
    if args['mask_file'] is not None:
        temp = fd_grid([bds['y'], bds['x']],
                       [args['spacing']['z0'], args['spacing']['z0']],
                       name='z0',
                       srs_WKT=args['srs_WKT'],
                       mask_file=args['mask_file'])
        data_mask = lin_op(temp, name='interp_z').interp_mtx(
            data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel())
        data_mask[~np.isfinite(data_mask)] = 0
        if np.any(data_mask == 0):
            data.subset(~(data_mask == 0))
            valid_data[valid_data] = ~(data_mask == 0)

    # define the interpolation operator, equal to the sum of the dz and z0 operators
    G_data = lin_op(grids['z0'],
                    name='interp_z').interp_mtx(data.coords()[0:2])
    G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords()))

    # define the smoothness constraints
    grad2_z0 = lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad2_dz = lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z',
                                                                 t_lag=1)
    grad_dzdt = lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z',
                                                                t_lag=1)
    constraint_op_list = [grad2_z0, grad2_dz, grad_dzdt]
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        d2z_dt2 = lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z')
        constraint_op_list.append(d2z_dt2)

    # if bias params are given, create a set of parameters to estimate them
    if args['bias_params'] is not None:
        data, bias_model = assign_bias_ID(data, args['bias_params'])
        G_bias, Gc_bias, Cvals_bias, bias_model = param_bias_matrix(
            data,
            bias_model,
            bias_param_name='bias_ID',
            col_0=grids['dz'].col_N)
        G_data.add(G_bias)
        constraint_op_list.append(Gc_bias)

    # put the equations together
    Gc = lin_op(None, name='constraints').vstack(constraint_op_list)
    N_eq = G_data.N_eq + Gc.N_eq

    # put together all the errors
    Ec = np.zeros(Gc.N_eq)
    root_delta_V_dz = np.sqrt(np.prod(grids['dz'].delta))
    root_delta_A_z0 = np.sqrt(np.prod(grids['z0'].delta))
    Ec[Gc.TOC['rows']['grad2_z0']] = args['E_RMS'][
        'd2z0_dx2'] / root_delta_A_z0 * grad2_z0.mask_for_ind0(
            args['mask_scale'])
    Ec[Gc.TOC['rows']['grad2_dzdt']] = args['E_RMS'][
        'd3z_dx2dt'] / root_delta_V_dz * grad2_dz.mask_for_ind0(
            args['mask_scale'])
    Ec[Gc.TOC['rows']['grad_dzdt']] = args['E_RMS'][
        'd2z_dxdt'] / root_delta_V_dz * grad_dzdt.mask_for_ind0(
            args['mask_scale'])
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        Ec[Gc.TOC['rows']
           ['d2z_dt2']] = args['E_RMS']['d2z_dt2'] / root_delta_V_dz
    if args['bias_params'] is not None:
        Ec[Gc.TOC['rows'][Gc_bias.name]] = Cvals_bias
    Ed = data.sigma.ravel()
    # calculate the inverse square root of the data covariance matrix
    TCinv = sp.dia_matrix((1. / np.concatenate((Ed, Ec)), 0),
                          shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs = np.zeros([N_eq])
    rhs[0:data.size] = data.z.ravel()

    # put the fit and constraint matrices together
    Gcoo = sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()
    cov_rows = G_data.N_eq + np.arange(Gc.N_eq)

    # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution:
    # Find the identify the rows and columns that match the reference epoch
    temp_r, temp_c = np.meshgrid(np.arange(0, grids['dz'].shape[0]),
                                 np.arange(0, grids['dz'].shape[1]))
    z02_mask = grids['dz'].global_ind([
        temp_r.transpose().ravel(),
        temp_c.transpose().ravel(),
        args['reference_epoch'] + np.zeros_like(temp_r).ravel()
    ])

    # Identify all of the DOFs that do not include the reference epoch
    cols = np.arange(G_data.col_N, dtype='int')
    include_cols = np.setdiff1d(cols, z02_mask)
    # Generate a matrix that has diagonal elements corresponding to all DOFs except the reference epoch.
    # Multiplying this by a matrix with columns for all model parameters yeilds a matrix with no columns
    # corresponding to the reference epoch.
    Ip_c = sp.coo_matrix((np.ones_like(include_cols),
                          (include_cols, np.arange(include_cols.size))),
                         shape=(Gc.col_N, include_cols.size)).tocsc()

    # eliminate the columns for the model variables that are set to zero
    Gcoo = Gcoo.dot(Ip_c)
    timing['setup'] = time() - tic

    if np.any(data.z > 2500):
        print('outlier!')
    # initialize the book-keeping matrices for the inversion
    m0 = np.zeros(Ip_c.shape[0])
    if "three_sigma_edit" in data.list_of_fields:
        inTSE = np.where(data.three_sigma_edit)[0]
    else:
        inTSE = np.arange(G_data.N_eq, dtype=int)
    if args['VERBOSE']:
        print("initial: %d:" % G_data.r.max())
    tic_iteration = time()
    for iteration in range(args['max_iterations']):
        # build the parsing matrix that removes invalid rows
        Ip_r = sp.coo_matrix(
            (np.ones(Gc.N_eq + inTSE.size),
             (np.arange(Gc.N_eq + inTSE.size), np.concatenate(
                 (inTSE, cov_rows)))),
            shape=(Gc.N_eq + inTSE.size, Gcoo.shape[0])).tocsc()

        m0_last = m0
        if args['VERBOSE']:
            print("starting qr solve for iteration %d" % iteration)
        # solve the equations
        tic = time()
        m0 = Ip_c.dot(
            sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)),
                           Ip_r.dot(TCinv.dot(rhs))))
        timing['sparseqr_solve'] = time() - tic

        # quit if the solution is too similar to the previous solution
        if (np.max(np.abs(
            (m0_last - m0)[Gc.TOC['cols']['dz']])) < 0.05) and (iteration > 2):
            break

        # calculate the full data residual
        rs_data = (data.z - G_data.toCSR().dot(m0)) / data.sigma
        # calculate the robust standard deviation of the scaled residuals for the selected data
        sigma_hat = RDE(rs_data[inTSE])
        inTSE_last = inTSE
        # select the data that are within 3*sigma of the solution
        inTSE = np.where(np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))[0]
        if args['VERBOSE']:
            print('found %d in TSE, sigma_hat=%3.3f' % (inTSE.size, sigma_hat))
        if (sigma_hat <= 1 or
            (inTSE.size == inTSE_last.size
             and np.all(inTSE_last == inTSE))) and (iteration > 2):
            if args['VERBOSE']:
                print("sigma_hat LT 1, exiting")
            break
    timing['iteration'] = time() - tic_iteration
    inTSE = inTSE_last
    valid_data[valid_data] = (np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))
    data.assign(
        {'three_sigma_edit': np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat)})
    # report the model-based estimate of the data points
    data.assign({'z_est': np.reshape(G_data.toCSR().dot(m0), data.shape)})

    # reshape the components of m to the grid shapes
    m['z0'] = np.reshape(m0[Gc.TOC['cols']['z0']], grids['z0'].shape)
    m['dz'] = np.reshape(m0[Gc.TOC['cols']['dz']], grids['dz'].shape)

    # calculate height rates
    for lag in args['dzdt_lags']:
        this_name = 'dzdt_lag%d' % lag
        m[this_name] = lin_op(grids['dz'], name='dzdt',
                              col_N=G_data.col_N).dzdt(lag=lag).grid_prod(m0)

    # build a matrix that takes the average of the central 20 km of the delta-z grid
    XR = np.mean(grids['z0'].bds[0]) + np.array([-1., 1.]) * args['W_ctr'] / 2.
    YR = np.mean(grids['z0'].bds[1]) + np.array([-1., 1.]) * args['W_ctr'] / 2.
    center_dzbar = lin_op(grids['dz'], name='center_dzbar',
                          col_N=G_data.col_N).vstack([
                              lin_op(grids['dz']).mean_of_bounds(
                                  (XR, YR, [season, season]))
                              for season in grids['dz'].ctrs[2]
                          ])
    G_dzbar = center_dzbar.toCSR()
    # calculate the grid mean of dz
    m['dz_bar'] = G_dzbar.dot(m0)

    # build a matrix that takes the lagged temporal derivative of dzbar (e.g. quarterly dzdt, annual dzdt)
    for lag in args['dzdt_lags']:
        this_name = 'dzdt_bar_lag%d' % lag
        this_op = lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR()
        # calculate the grid mean of dz/dt
        m[this_name] = this_op.dot(m['dz_bar'].ravel())

    # report the parameter biases.  Sorted in order of the parameter bias arguments
    #???
    if args['bias_params'] is not None:
        m['bias'] = parse_biases(m0, bias_model['bias_ID_dict'],
                                 args['bias_params'])

    # report the entire model vector, just in case we want it.

    m['all'] = m0

    # report the geolocation of the output map
    m['extent'] = np.concatenate((grids['z0'].bds[1], grids['z0'].bds[0]))

    # parse the resduals to assess the contributions of the total error:
    # Make the C matrix for the constraints
    TCinv_cov = sp.dia_matrix((1. / Ec, 0), shape=(Gc.N_eq, Gc.N_eq))
    rc = TCinv_cov.dot(Gc.toCSR().dot(m0))
    ru = Gc.toCSR().dot(m0)
    R = dict()
    RMS = dict()
    for eq_type in ['d2z_dt2', 'grad2_z0', 'grad2_dzdt']:
        if eq_type in Gc.TOC['rows']:
            R[eq_type] = np.sum(rc[Gc.TOC['rows'][eq_type]]**2)
            RMS[eq_type] = np.sqrt(np.mean(ru[Gc.TOC['rows'][eq_type]]**2))
    R['data'] = np.sum(((data.z_est - data.z) / data.sigma)**2)
    RMS['data'] = np.sqrt(np.mean((data.z_est - data.z)**2))

    # if we need to compute the errors in the solution, continue
    if args['compute_E']:
        tic = time()
        # take the QZ transform of Gcoo
        z, R, perm, rank = sparseqr.qz(Ip_r.dot(TCinv.dot(Gcoo)),
                                       Ip_r.dot(TCinv.dot(rhs)))
        z = z.ravel()
        R = R.tocsr()
        R.sort_indices()
        R.eliminate_zeros()
        timing['decompose_qz'] = time() - tic

        E0 = np.zeros(R.shape[0])

        # compute Rinv for use in propagating errors.
        # what should the tolerance be?  We will eventually square Rinv and take its
        # row-wise sum.  We care about errors at the cm level, so
        # size(Rinv)*tol^2 = 0.01 -> tol=sqrt(0.01/size(Rinv))~ 1E-4
        tic = time()
        RR, CC, VV, status = inv_tr_upper(R, np.int(np.prod(R.shape) / 4),
                                          1.e-5)
        # save Rinv as a sparse array.  The syntax perm[RR] undoes the permutation from QZ
        Rinv = sp.coo_matrix((VV, (perm[RR], CC)), shape=R.shape).tocsr()
        timing['Rinv_cython'] = time() - tic
        tic = time()
        E0 = np.sqrt(Rinv.power(2).sum(axis=1))
        timing['propagate_errors'] = time() - tic

        # generate the full E vector.  E0 appears to be an ndarray,
        E0 = np.array(Ip_c.dot(E0)).ravel()
        E['z0'] = np.reshape(E0[Gc.TOC['cols']['z0']], grids['z0'].shape)
        E['dz'] = np.reshape(E0[Gc.TOC['cols']['dz']], grids['dz'].shape)

        # generate the lagged dz errors:

        for lag in args['dzdt_lags']:
            this_name = 'dzdt_lag%d' % lag
            E[this_name] = lin_op(grids['dz'],
                                  name=this_name,
                                  col_N=G_data.col_N).dzdt(lag=lag).grid_error(
                                      Ip_c.dot(Rinv))

            this_name = 'dzdt_bar_lag%d' % lag
            this_op = lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR()
            E[this_name] = np.sqrt(
                (this_op.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))
        # calculate the grid mean of dz/dt

        # generate the season-to-season errors
        #E['dzdt_qyr']=lin_op(grids['dz'], name='dzdt_1yr', col_N=G_data.col_N).dzdt().grid_error(Ip_c.dot(Rinv))

        # generate the annual errors
        #E['dzdt_1yr']=lin_op(grids['dz'], name='dzdt_1yr', col_N=G_data.col_N).dzdt(lag=4).grid_error(Ip_c.dot(Rinv))

        # generate the grid-mean error
        E['dz_bar'] = np.sqrt(
            (G_dzbar.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))

        # generate the grid-mean quarterly dzdt error
        #E['dzdt_bar_qyr']=np.sqrt((ddt_qyr.dot(G_dzbar).dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))

        # generate the grid-mean annual dzdt error
        #E['dzdt_bar_1yr']=np.sqrt((ddt_1yr.dot(G_dzbar).dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))

        # report the rgt bias errors.  Sorted by RGT, then by  cycle
        if args['bias_params'] is not None:
            E['bias'] = parse_biases(E0, bias_model['bias_ID_dict'],
                                     args['bias_params'])

    TOC = Gc.TOC
    return {
        'm': m,
        'E': E,
        'data': data,
        'grids': grids,
        'valid_data': valid_data,
        'TOC': TOC,
        'R': R,
        'RMS': RMS,
        'timing': timing,
        'E_RMS': args['E_RMS']
    }
Ejemplo n.º 13
0
def smooth_xytb_fit(**kwargs):
    required_fields = ('data', 'W', 'ctr', 'spacing', 'E_RMS')
    args = {
        'reference_epoch': 0,
        'W_ctr': 1e4,
        'mask_file': None,
        'mask_data': None,
        'mask_scale': None,
        'compute_E': False,
        'max_iterations': 10,
        'srs_proj4': None,
        'N_subset': None,
        'bias_params': None,
        'bias_filter': None,
        'repeat_res': None,
        'converge_tol_dz': 0.05,
        'DEM_tol': None,
        'repeat_dt': 1,
        'Edit_only': False,
        'dzdt_lags': None,
        'avg_scales': [],
        'data_slope_sensors': None,
        'E_slope': 0.05,
        'E_RMS_d2x_PS_bias': None,
        'E_RMS_PS_bias': None,
        'error_res_scale': None,
        'avg_masks': None,
        'grid_bias_model_args': None,
        'bias_nsigma_edit': None,
        'bias_nsigma_iteration': 2,
        'VERBOSE': True
    }
    args.update(kwargs)
    for field in required_fields:
        if field not in kwargs:
            raise ValueError("%s must be defined", field)
    valid_data = np.isfinite(
        args['data'].z)  #np.ones_like(args['data'].x, dtype=bool)
    timing = dict()

    if args['N_subset'] is not None:
        tic = time()
        valid_data &= edit_data_by_subset_fit(args['N_subset'], args)
        timing['edit_by_subset'] = time() - tic
        if args['Edit_only']:
            return {'timing': timing, 'data': args['data'].copy()[valid_data]}
    m = {}
    E = {}
    R = {}
    RMS = {}
    tic = time()
    # define the grids
    grids, bds = setup_grids(args)

    #print("\nstarting smooth_xytb_fit")
    #summarize_time(args['data'], grids['dz'].ctrs[2], np.ones(args['data'].shape, dtype=bool))

    # select only the data points that are within the grid bounds
    valid_z0 = grids['z0'].validate_pts((args['data'].coords()[0:2]))
    valid_dz = grids['dz'].validate_pts((args['data'].coords()))
    valid_data = valid_data & valid_dz & valid_z0

    if not np.any(valid_data):
        if args['VERBOSE']:
            print("smooth_xytb_fit: no valid data")
        return {
            'm': m,
            'E': E,
            'data': None,
            'grids': grids,
            'valid_data': valid_data,
            'TOC': {},
            'R': {},
            'RMS': {},
            'timing': timing,
            'E_RMS': args['E_RMS']
        }

    # subset the data based on the valid mask
    data = args['data'].copy_subset(valid_data)

    #print("\n\nafter validation")
    #summarize_time(data, grids['dz'].ctrs[2], np.ones(data.shape, dtype=bool))

    # if we have a mask file, use it to subset the data
    # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails.
    if args['mask_file'] is not None or args['mask_data'] is not None:
        setup_mask(data, grids, valid_data, bds, args)

    # Check if we have any data.  If not, quit
    if data.size == 0:
        if args['VERBOSE']:
            print("smooth_xytb_fit: no valid data")
        return {
            'm': m,
            'E': E,
            'data': data,
            'grids': grids,
            'valid_data': valid_data,
            'TOC': {},
            'R': {},
            'RMS': {},
            'timing': timing,
            'E_RMS': args['E_RMS']
        }

    # define the interpolation operator, equal to the sum of the dz and z0 operators
    G_data = lin_op(grids['z0'],
                    name='interp_z').interp_mtx(data.coords()[0:2])
    G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords()))

    # define the smoothness constraints
    constraint_op_list = []
    setup_smoothness_constraints(grids, constraint_op_list, args['E_RMS'],
                                 args['mask_scale'])

    # setup the smooth biases
    if args['grid_bias_model_args'] is not None:
        for bm_args in args['grid_bias_model_args']:
            setup_grid_bias(data, G_data, constraint_op_list, grids, **bm_args)

    #if args['E_RMS_d2x_PS_bias'] is not None:
    #    setup_PS_bias(data, G_data, constraint_op_list, grids, bds, args)

    # if bias params are given, create a set of parameters to estimate them
    if args['bias_params'] is not None:
        data, bias_model = assign_bias_ID(data, args['bias_params'], \
                                          bias_filter=args['bias_filter'])
        setup_bias_fit(data,
                       bias_model,
                       G_data,
                       constraint_op_list,
                       bias_param_name='bias_ID')
    else:
        bias_model = {}
    if args['data_slope_sensors'] is not None and len(
            args['data_slope_sensors']) > 0:
        #N.B.  This does not currently work.
        bias_model['E_slope'] = args['E_slope']
        G_slope_bias, Gc_slope_bias, Cvals_slope_bias, bias_model = data_slope_bias(
            data,
            bias_model,
            sensors=args['data_slope_sensors'],
            col_0=G_data.col_N)
        G_data.add(G_slope_bias)
        constraint_op_list.append(Gc_slope_bias)
    # put the equations together
    Gc = lin_op(None, name='constraints').vstack(constraint_op_list)

    N_eq = G_data.N_eq + Gc.N_eq

    # put together all the errors
    Ec = np.zeros(Gc.N_eq)
    for op in constraint_op_list:
        try:
            Ec[Gc.TOC['rows'][op.name]] = op.expected
        except ValueError as E:
            print("smooth_xytb_fit:\n\t\tproblem with " + op.name)
            raise (E)
    if args['data_slope_sensors'] is not None and len(
            args['data_slope_sensors']) > 0:
        Ec[Gc.TOC['rows'][Gc_slope_bias.name]] = Cvals_slope_bias
    Ed = data.sigma.ravel()
    if np.any(Ed == 0):
        raise (ValueError('zero value found in data sigma'))
    if np.any(Ec == 0):
        raise (ValueError('zero value found in constraint sigma'))
    #print({op.name:[Ec[Gc.TOC['rows'][op.name]].min(),  Ec[Gc.TOC['rows'][op.name]].max()] for op in constraint_op_list})
    # calculate the inverse square root of the data covariance matrix
    TCinv = sp.dia_matrix((1. / np.concatenate((Ed, Ec)), 0),
                          shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs = np.zeros([N_eq])
    rhs[0:data.size] = data.z.ravel()

    # put the fit and constraint matrices together
    Gcoo = sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()

    # setup operators that take averages of the grid at different scales
    averaging_ops = setup_averaging_ops(grids['dz'],
                                        G_data.col_N,
                                        args,
                                        cell_area=grids['dz'].cell_area)

    # setup masked averaging ops
    averaging_ops.update(
        setup_avg_mask_ops(grids['dz'], G_data.col_N, args['avg_masks'],
                           args['dzdt_lags']))

    # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution:
    Ip_c = build_reference_epoch_matrix(G_data, Gc, grids,
                                        args['reference_epoch'])

    # eliminate the columns for the model variables that are set to zero
    Gcoo = Gcoo.dot(Ip_c)
    timing['setup'] = time() - tic

    # initialize the book-keeping matrices for the inversion
    if "three_sigma_edit" in data.fields:
        in_TSE = np.flatnonzero(data.three_sigma_edit)
    else:
        in_TSE = np.arange(G_data.N_eq, dtype=int)
    in_TSE_last = np.zeros([0])
    if args['VERBOSE']:
        print("initial: %d:" % G_data.r.max(), flush=True)

    # if we've done any iterations, parse the model and the data residuals
    if args['max_iterations'] > 0:
        tic_iteration = time()
        m0, sigma_hat, in_TSE, in_TSE_last, rs_data=iterate_fit(data, Gcoo, rhs, \
                                TCinv, G_data, Gc, in_TSE, Ip_c, timing, args, \
                                    bias_model=bias_model)

        timing['iteration'] = time() - tic_iteration
        in_TSE = in_TSE_last
        valid_data[valid_data] = (np.abs(rs_data) <
                                  3.0 * np.maximum(1, sigma_hat))
        data.assign({
            'three_sigma_edit':
            np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat)
        })
        # report the model-based estimate of the data points
        data.assign({'z_est': np.reshape(G_data.toCSR().dot(m0), data.shape)})
        parse_model(m, m0, data, R, RMS, G_data, averaging_ops, Gc, Ec, grids,
                    bias_model, args)
        R['data'] = np.sum((((data.z_est[data.three_sigma_edit == 1] -
                              data.z[data.three_sigma_edit == 1]) /
                             data.sigma[data.three_sigma_edit == 1])**2))
        RMS['data'] = np.sqrt(
            np.mean((data.z_est[data.three_sigma_edit == 1] -
                     data.z[data.three_sigma_edit == 1])**2))

    # Compute the error in the solution if requested
    if args['compute_E']:
        # We have generally not done any iterations at this point, so need to make the Ip_r matrix
        cov_rows = G_data.N_eq + np.arange(Gc.N_eq)
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+in_TSE.size), (np.arange(Gc.N_eq+in_TSE.size), np.concatenate((in_TSE, cov_rows)))), \
                           shape=(Gc.N_eq+in_TSE.size, Gcoo.shape[0])).tocsc()
        if args['VERBOSE']:
            print("Starting uncertainty calculation", flush=True)
            tic_error = time()
        # recalculate the error scaling from the misfits
        rs = (data.z_est - data.z) / data.sigma
        error_scale = RDE(rs[data.three_sigma_edit == 1])
        print(f"scaling uncertainties by {error_scale}")
        calc_and_parse_errors(E, Gcoo, TCinv, rhs, Ip_c, Ip_r, grids, G_data, Gc, averaging_ops, \
                         bias_model, args['bias_params'], dzdt_lags=args['dzdt_lags'], timing=timing, \
                             error_scale=error_scale)
        if args['VERBOSE']:
            print("\tUncertainty propagation took %3.2f seconds" %
                  (time() - tic_error),
                  flush=True)

    TOC = Gc.TOC
    return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, \
            'TOC':TOC,'R':R, 'RMS':RMS, 'timing':timing,'E_RMS':args['E_RMS'], \
                'dzdt_lags':args['dzdt_lags']}
Ejemplo n.º 14
0
def setup_averaging_ops(grid, col_N, args, cell_area=None):
    # build operators that take the average of of the delta-z grid at large scales.
    # these get used both in the averaging and error-calculation codes

    ops = {}
    if args['dzdt_lags'] is not None:
        # build the not-averaged dz/dt operators (these are not masked)
        for lag in args['dzdt_lags']:
            this_name = 'dzdt_lag' + str(lag)
            op = lin_op(grid, name=this_name, col_N=col_N).dzdt(lag=lag)
            op.dst_grid.cell_area = grid.cell_area
            ops[this_name] = op

    # make the averaged ops
    if args['avg_scales'] is None:
        return ops
    if args['dzdt_lags'] is None:
        return ops

    N_grid = [ctrs.size for ctrs in grid.ctrs]
    for scale in args['avg_scales']:
        this_name = 'avg_dz_' + str(int(scale)) + 'm'
        kernel_N = np.floor(
            np.array([scale / dd for dd in grid.delta[0:2]] + [1])).astype(int)

        # subscripts for the centers of the averaged areas
        # assume that the largest averaging offset takes the mean of the center
        # of the grid.  Otherwise, center the cells on odd muliples of the grid
        # spacing
        if scale == np.max(args['avg_scales']):
            offset = 0
        else:
            offset = 0.5

        grid_ctr_subs = [
            sym_range(N_grid[0], kernel_N[0], offset=offset),
            sym_range(N_grid[1], kernel_N[1], offset=offset),
            np.arange(grid.shape[2], dtype=int)
        ]

        sub0s = np.meshgrid(*grid_ctr_subs, indexing='ij')

        # make the operator
        op=lin_op(grid, name=this_name, col_N=col_N)\
            .sum_to_grid3(kernel_N+1, sub0s=sub0s, taper=True)

        op.apply_2d_mask(mask=cell_area)
        op.dst_grid.cell_area = sum_cell_area(grid,
                                              op.dst_grid,
                                              sub0s=sub0s,
                                              cell_area_f=cell_area)

        if cell_area is not None:
            # if cell area was specified, normalize each row by the input area
            op.normalize_by_unit_product()
        else:
            # divide the values by the kernel area in cells
            op.v /= (kernel_N[0] * kernel_N[1])
        op.dst_grid.cell_area = sum_cell_area(grid,
                                              op.dst_grid,
                                              sub0s=sub0s,
                                              cell_area_f=cell_area)
        ops[this_name] = op

        for lag in args['dzdt_lags']:
            dz_name = 'avg_dzdt_' + str(int(scale)) + 'm' + '_lag' + str(lag)
            op=lin_op(grid, name=this_name, col_N=col_N)\
                .sum_to_grid3(kernel_N+1, sub0s=sub0s, lag=lag, taper=True)\
                    .apply_2d_mask(mask=cell_area)
            op.dst_grid.cell_area = sum_cell_area(grid,
                                                  op.dst_grid,
                                                  sub0s=sub0s,
                                                  cell_area_f=cell_area)
            if cell_area is not None:
                # the appropriate weight is expected number of nonzero elements
                # for each nonzero node, times the weight for each time step
                op.normalize_by_unit_product(wt=2 / (lag * grid.delta[2]))
            else:
                op.v /= (kernel_N[0] * kernel_N[1])
            ops[dz_name] = op

    return ops
Ejemplo n.º 15
0
def smooth_xyt_fit(**kwargs):
    required_fields=('data','W','ctr','spacing','E_RMS')
    args={'reference_epoch':0,
    'W_ctr':1e4,
    'mask_file':None,
    'mask_scale':None,
    'compute_E':False,
    'max_iterations':10,
    'srs_proj4': None,
    'N_subset': None,
    'bias_params': None,
    'repeat_res':None,
    'converge_tol_dz':0.05,
    'repeat_dt': 1,
    'Edit_only': False,
    'dzdt_lags':[1, 4],
    'data_slope_sensors':None,
    'E_slope':0.05,
    'VERBOSE': True}
    args.update(kwargs)
    for field in required_fields:
        if field not in kwargs:
            raise ValueError("%s must be defined", field)
    valid_data = np.isfinite(args['data'].z) & np.isfinite(args['data'].sigma)
    timing=dict()

    if args['N_subset'] is not None:
        tic=time()
        valid_data &= edit_data_by_subset_fit(args['N_subset'], args)
        timing['edit_by_subset']=time()-tic
        if args['Edit_only']:
            return {'timing':timing, 'data':args['data'].copy()[valid_data]}
    m={}
    E={}
    R={}
    RMS={}

    # define the grids
    tic=time()
    bds={coord:args['ctr'][coord]+np.array([-0.5, 0.5])*args['W'][coord] for coord in ('x','y','t')}
    grids=dict()
    grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2),\
         name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'])
    grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \
        [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], \
         name='dz', col_0=grids['z0'].N_nodes, srs_proj4=args['srs_proj4'], \
        mask_file=args['mask_file'])
    grids['z0'].col_N=grids['dz'].col_N
    grids['t']=fd_grid([bds['t']], [args['spacing']['dt']], name='t')

    # select only the data points that are within the grid bounds
    valid_z0=grids['z0'].validate_pts((args['data'].coords()[0:2]))
    valid_dz=grids['dz'].validate_pts((args['data'].coords()))
    valid_data=valid_data & valid_dz & valid_z0
    
    if not np.any(valid_data):
        return {'m':m, 'E':E, 'data':None, 'grids':grids, 'valid_data': valid_data, 'TOC':{},'R':{}, 'RMS':{}, 'timing':timing,'E_RMS':args['E_RMS']}

    # if repeat_res is given, resample the data to include only repeat data (to within a spatial tolerance of repeat_res)
    if args['repeat_res'] is not None:
        N_before_repeat=np.sum(valid_data)   
        valid_data[valid_data]=valid_data[valid_data] & \
            select_repeat_data(args['data'].copy_subset(valid_data), grids, args['repeat_dt'], args['repeat_res'], reference_time=grids['t'].ctrs[0][args['reference_epoch']])
        if args['VERBOSE']:
            print("before repeat editing found %d data" % N_before_repeat)
            print("after repeat editing found %d data" % valid_data.sum())

    # subset the data based on the valid mask
    data=args['data'].copy_subset(valid_data)

    # if we have a mask file, use it to subset the data
    # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails.
    if args['mask_file'] is not None:
        temp=fd_grid( [bds['y'], bds['x']], [args['spacing']['z0'], args['spacing']['z0']], name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'])
        data_mask=lin_op(temp, name='interp_z').interp_mtx(data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel())
        data_mask[~np.isfinite(data_mask)]=0
        if np.any(data_mask==0):
            data.index(~(data_mask==0))
            valid_data[valid_data]= ~(data_mask==0)

    # Check if we have any data.  If not, quit
    if data.size==0:
        return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, 'TOC':{},'R':{}, 'RMS':{}, 'timing':timing,'E_RMS':args['E_RMS']}

    # define the interpolation operator, equal to the sum of the dz and z0 operators
    G_data=lin_op(grids['z0'], name='interp_z').interp_mtx(data.coords()[0:2])
    G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords()))

     # define the smoothness constraints
    grad2_z0=lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad2_dz=lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z', t_lag=1)
    grad_dzdt=lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z', t_lag=1)
    constraint_op_list=[grad2_z0, grad2_dz, grad_dzdt]
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        d2z_dt2=lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z')
        constraint_op_list.append(d2z_dt2)

    # if bias params are given, create a set of parameters to estimate them
    if args['bias_params'] is not None:
        data, bias_model=assign_bias_ID(data, args['bias_params'])
        G_bias, Gc_bias, Cvals_bias, bias_model=\
            param_bias_matrix(data, bias_model, bias_param_name='bias_ID', 
                              col_0=grids['dz'].col_N)
        G_data.add(G_bias)
        constraint_op_list.append(Gc_bias)

    if args['data_slope_sensors'] is not None:
        bias_model['E_slope']=args['E_slope']
        G_slope_bias, Gc_slope_bias, Cvals_slope_bias, bias_model= data_slope_bias(data,  bias_model, sensors=args['data_slope_sensors'],  col_0=G_data.col_N)
        G_data.add(G_slope_bias)
        constraint_op_list.append(Gc_slope_bias)
    # put the equations together
    Gc=lin_op(None, name='constraints').vstack(constraint_op_list)
    N_eq=G_data.N_eq+Gc.N_eq

    # put together all the errors
    Ec=np.zeros(Gc.N_eq)
    root_delta_V_dz=np.sqrt(np.prod(grids['dz'].delta))
    root_delta_A_z0=np.sqrt(np.prod(grids['z0'].delta))
    Ec[Gc.TOC['rows']['grad2_z0']]=args['E_RMS']['d2z0_dx2']/root_delta_A_z0*grad2_z0.mask_for_ind0(args['mask_scale'])
    Ec[Gc.TOC['rows']['grad2_dzdt']]=args['E_RMS']['d3z_dx2dt']/root_delta_V_dz*grad2_dz.mask_for_ind0(args['mask_scale'])
    Ec[Gc.TOC['rows']['grad_dzdt']]=args['E_RMS']['d2z_dxdt']/root_delta_V_dz*grad_dzdt.mask_for_ind0(args['mask_scale'])
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        Ec[Gc.TOC['rows']['d2z_dt2']]=args['E_RMS']['d2z_dt2']/root_delta_V_dz
    if args['bias_params'] is not None:
        Ec[Gc.TOC['rows'][Gc_bias.name]] = Cvals_bias
    if args['data_slope_sensors'] is not None:
        Ec[Gc.TOC['rows'][Gc_slope_bias.name]] = Cvals_slope_bias
    Ed=data.sigma.ravel()
    # calculate the inverse square root of the data covariance matrix
    TCinv=sp.dia_matrix((1./np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs=np.zeros([N_eq])
    rhs[0:data.size]=data.z.ravel()

    # put the fit and constraint matrices together
    Gcoo=sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()
    cov_rows=G_data.N_eq+np.arange(Gc.N_eq)
     
    # build a matrix that takes the average of the center of the delta-z grid
    # this gets used both in the averaging and error-calculation codes
    XR=np.mean(grids['z0'].bds[0])+np.array([-1., 1.])*args['W_ctr']/2.
    YR=np.mean(grids['z0'].bds[1])+np.array([-1., 1.])*args['W_ctr']/2.
    center_dzbar=lin_op(grids['dz'], name='center_dzbar', col_N=G_data.col_N).vstack([lin_op(grids['dz']).mean_of_bounds((XR, YR, [season, season] )) for season in grids['dz'].ctrs[2]])
    G_dzbar=center_dzbar.toCSR()

    # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution:
    # Find the rows and columns that match the reference epoch
    temp_r, temp_c=np.meshgrid(np.arange(0, grids['dz'].shape[0]), np.arange(0, grids['dz'].shape[1]))
    z02_mask=grids['dz'].global_ind([temp_r.transpose().ravel(), temp_c.transpose().ravel(),\
                  args['reference_epoch']+np.zeros_like(temp_r).ravel()])

    # Identify all of the DOFs that do not include the reference epoch
    cols=np.arange(G_data.col_N, dtype='int')
    include_cols=np.setdiff1d(cols, z02_mask)
    # Generate a matrix that has diagonal elements corresponding to all DOFs except the reference epoch.
    # Multiplying this by a matrix with columns for all model parameters yeilds a matrix with no columns
    # corresponding to the reference epoch.
    Ip_c=sp.coo_matrix((np.ones_like(include_cols), (include_cols, np.arange(include_cols.size))), \
                       shape=(Gc.col_N, include_cols.size)).tocsc()

    # eliminate the columns for the model variables that are set to zero
    Gcoo=Gcoo.dot(Ip_c)
    timing['setup']=time()-tic

    # initialize the book-keeping matrices for the inversion
    m0=np.zeros(Ip_c.shape[0])
    if "three_sigma_edit" in data.fields:
        inTSE=np.flatnonzero(data.three_sigma_edit)
    else:
        inTSE=np.arange(G_data.N_eq, dtype=int)
    inTSE_last = np.zeros([0])
    if args['VERBOSE']:
        print("initial: %d:" % G_data.r.max())
    tic_iteration=time()
    for iteration in range(args['max_iterations']):
        # build the parsing matrix that removes invalid rows
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), \
                           shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc()

        m0_last=m0
        if args['VERBOSE']:
            print("starting qr solve for iteration %d" % iteration)
        # solve the equations
        tic=time(); 
        m0=Ip_c.dot(sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)))); 
        timing['sparseqr_solve']=time()-tic

        # calculate the full data residual
        rs_data=(data.z-G_data.toCSR().dot(m0))/data.sigma
        # calculate the robust standard deviation of the scaled residuals for the selected data
        sigma_hat=RDE(rs_data[inTSE])
        
        # select the data that have scaled residuals < 3 *max(1, sigma_hat)
        inTSE_last=inTSE
        inTSE = np.flatnonzero(np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))
        
        # quit if the solution is too similar to the previous solution
        if (np.max(np.abs((m0_last-m0)[Gc.TOC['cols']['dz']])) < args['converge_tol_dz']) and (iteration > 2):
            if args['VERBOSE']:
                print("Solution identical to previous iteration with tolerance %3.1f, exiting after iteration %d" % (args['converge_tol_dz'], iteration))
            break
        # select the data that are within 3*sigma of the solution
        if args['VERBOSE']:
            print('found %d in TSE, sigma_hat=%3.3f' % ( inTSE.size, sigma_hat ))
        if iteration > 0:
            if inTSE.size == inTSE_last.size and np.all( inTSE_last == inTSE ):
                if args['VERBOSE']:
                    print("filtering unchanged, exiting after iteration %d" % iteration)
                break 
        if iteration >= 2:
            if sigma_hat <= 1:
                if args['VERBOSE']:
                    print("sigma_hat LT 1, exiting after iteration %d" % iteration)
                break             

    # if we've done any iterations, parse the model and the data residuals
    if args['max_iterations'] > 0:
        timing['iteration']=time()-tic_iteration
        inTSE=inTSE_last
        valid_data[valid_data]=(np.abs(rs_data)<3.0*np.maximum(1, sigma_hat))
        data.assign({'three_sigma_edit':np.abs(rs_data)<3.0*np.maximum(1, sigma_hat)})
        # report the model-based estimate of the data points
        data.assign({'z_est':np.reshape(G_data.toCSR().dot(m0), data.shape)})
        parse_model(m, m0, G_data, G_dzbar, Gc.TOC, grids, args['bias_params'], bias_model, dzdt_lags=args['dzdt_lags'])
        # parse the resduals to assess the contributions of the total error:
        # Make the C matrix for the constraints
        TCinv_cov=sp.dia_matrix((1./Ec, 0), shape=(Gc.N_eq, Gc.N_eq))
        rc=TCinv_cov.dot(Gc.toCSR().dot(m0))
        ru=Gc.toCSR().dot(m0)
        for eq_type in ['d2z_dt2','grad2_z0','grad2_dzdt']:
            if eq_type in Gc.TOC['rows']:
                R[eq_type]=np.sum(rc[Gc.TOC['rows'][eq_type]]**2)
                RMS[eq_type]=np.sqrt(np.mean(ru[Gc.TOC['rows'][eq_type]]**2))
    R['data']=np.sum((((data.z_est[data.three_sigma_edit==1]-data.z[data.three_sigma_edit==1])/data.sigma[data.three_sigma_edit==1])**2))
    RMS['data']=np.sqrt(np.mean((data.z_est[data.three_sigma_edit==1]-data.z[data.three_sigma_edit==1])**2))

    # Compute the error in the solution if requested
    if args['compute_E']:
        # We have generally not done any iterations at this point, so need to make the Ip_r matrix
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), \
                           shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc()
        parse_errors(E, Gcoo, TCinv, rhs, Ip_c, Ip_r, grids, G_data, Gc, G_dzbar, \
                         bias_model, args['bias_params'], dzdt_lags=args['dzdt_lags'], timing=timing)

 

    TOC=Gc.TOC
    return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, 'TOC':TOC,'R':R, 'RMS':RMS, 'timing':timing,'E_RMS':args['E_RMS'], 'dzdt_lags':args['dzdt_lags']}
Ejemplo n.º 16
0
def glas_fit(xy0=np.array((-150000, -2000000)), W0, D=None, E_RMS=None, gI=None, giFile='/Data/glas/GL/rel_634/GeoIndex.h5'):
    if gI is None:
        gI=geo_index().from_file(giFile)

    #import_D=False; print("WARNING::::::REUSING D")
    timing=dict()
    xy0=np.array((-150000, -2000000))
    E_RMS={'d2z0_dx2':20000./3000/3000, 'd3z_dx2dt':10./3000/3000, 'd2z_dxdt':100/3000, 'd2z_dt2':1}

    W={'x':W0, 'y':W0,'t':6}
    spacing={'z0':5.e2, 'dzdt':5.e3}

    args={'W':W, 'ctr':ctr, 'spacing':spacing, 'E_RMS':E_RMS, 'max_iterations':25}


    if D is None:
        fields=[ 'IceSVar', 'deltaEllip', 'numPk', 'ocElv', 'reflctUC', 'satElevCorr',  'time',  'x', 'y', 'z']
        ctr={'x':xy0[0], 'y':xy0[1], 't':(2003+2009)/2. }

        D=gI.query_xy_box(xy0[0]+np.array([-W['x']/2, W['x']/2]), xy0[1]+np.array([-W['y']/2, W['y']/2]), fields=fields)

        #plt.plot(xy[0], xy[1],'.')
        #plt.plot(xy0[0], xy0[1],'r*')

        D.assign({'year': matlabToYear(D.time)})
        good=(D.IceSVar < 0.035) & (D.reflctUC >0.05) & (D.satElevCorr < 1) & (D.numPk==1)
        D.subset(good, datasets=['x','y','z','year'])

        D.assign({'sigma':np.zeros_like(D.x)+0.2, 'time':D.year})
        plt.plot(D.x, D.y,'m.')

    bds={coord:args['ctr'][coord]+np.array([-0.5, 0.5])*args['W'][coord] for coord in ('x','y')}
    grids=dict()
    grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2), name='z0')
    grids['dzdt']=fd_grid( [bds['y'], bds['x']],  args['spacing']['dzdt']*np.ones(2), \
         col_0=grids['z0'].col_N+1, name='dzdt')

    valid_z0=grids['z0'].validate_pts((D.coords()[0:2]))
    valid_dz=grids['dzdt'].validate_pts((D.coords()))
    valid_data=valid_dz & valid_z0
    D=D.subset(valid_data)

    G_data=lin_op(grids['z0'], name='interp_z').interp_mtx(D.coords()[0:2])
    G_dzdt=lin_op(grids['dzdt'], name='dzdt').interp_mtx(D.coords()[0:2])
    G_dzdt.v *= (D.year[G_dzdt.r.astype(int)]-ctr['t'])
    G_data.add(G_dzdt)

    grad2_z0=lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad_z0=lin_op(grids['z0'], name='grad_z0').grad(DOF='z0')
    grad2_dzdt=lin_op(grids['dzdt'], name='grad2_dzdt').grad2(DOF='dzdt')
    grad_dzdt=lin_op(grids['dzdt'], name='grad_dzdt').grad2(DOF='dzdt')
    Gc=lin_op(None, name='constraints').vstack((grad2_z0, grad_z0, grad2_dzdt, grad_dzdt))
    Ec=np.zeros(Gc.N_eq)
    root_delta_A_z0=np.sqrt(np.prod(grids['z0'].delta))
    Ec[Gc.TOC['rows']['grad2_z0']]=args['E_RMS']['d2z0_dx2']/root_delta_A_z0
    Ec[Gc.TOC['rows']['grad2_dzdt']]=args['E_RMS']['d3z_dx2dt']/root_delta_A_z0
    Ec[Gc.TOC['rows']['grad_z0']]=1.e4*args['E_RMS']['d2z0_dx2']/root_delta_A_z0
    Ec[Gc.TOC['rows']['grad_dzdt']]=1.e4*args['E_RMS']['d3z_dx2dt']/root_delta_A_z0

    Ed=D.sigma.ravel()

    N_eq=G_data.N_eq+Gc.N_eq

    # calculate the inverse square root of the data covariance matrix
    TCinv=sp.dia_matrix((1./np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs=np.zeros([N_eq])
    rhs[0:D.x.size]=D.z.ravel()

    # put the fit and constraint matrices together
    Gcoo=sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()
    cov_rows=G_data.N_eq+np.arange(Gc.N_eq)

    # initialize the book-keeping matrices for the inversion
    m0=np.zeros(Gcoo.shape[1])
    inTSE=np.arange(G_data.N_eq, dtype=int)

    for iteration in range(args['max_iterations']):
        # build the parsing matrix that removes invalid rows
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc()

        m0_last=m0
        # solve the equations
        tic=time();
        m0=sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)));
        timing['sparseqr_solve']=time()-tic

        # quit if the solution is too similar to the previous solution
        if np.max(np.abs((m0_last-m0)[Gc.TOC['cols']['dzdt']])) < 0.05:
            break

        # calculate the full data residual
        rs_data=(D.z-G_data.toCSR().dot(m0))/D.sigma
        # calculate the robust standard deviation of the scaled residuals for the selected data
        sigma_hat=RDE(rs_data[inTSE])
        inTSE_last=inTSE
        # select the data that are within 3*sigma of the solution
        inTSE=np.where(np.abs(rs_data)<3.0*sigma_hat)[0]
        print('found %d in TSE, sigma_hat=%3.3f' % (inTSE.size, sigma_hat))
        if sigma_hat <= 1 or( inTSE.size == inTSE_last.size and np.all( inTSE_last == inTSE )):
            break
    m=dict()
    m['z0']=m0[Gc.TOC['cols']['z0']].reshape(grids['z0'].shape)
    m['dzdt']=m0[Gc.TOC['cols']['dzdt']].reshape(grids['dzdt'].shape)
    if DOPLOT:
        plt.subplot(121)
        plt.imshow(m['z0'])
        plt.colorbar()
        plt.subplot(122)
        plt.imshow(m['dzdt'])
        plt.colorbar()

    if False:
        plt.figure()
        Dfinal=D.subset(inTSE)
        ii=np.argsort(Dfinal.z)
        plt.scatter(Dfinal.x[ii], Dfinal.y[ii], c=Dfinal.z[ii]); plt.colorbar()


    return grids, m, D, inTSE, sigma_hat