예제 #1
0
def setup_grids(args):
    '''
    setup the grids for the problem.

    Inputs:
    args: (dict) dictionary containing input arguments. Required entries:
        W: dictionary with entries 'x','y','t' specifying the domain width in x, y, and time
        ctr: dictionary with entries 'x','y','t' specifying the domain center in x, y, and time
        spacing: dictionary with entries 'z0','dz' and 'dt' specifying the spacing of the z0 grid, the spacing of the dz grid, and the duration of the epochs
        srs_proj4: a proj4 string specifying the data projection       
        mask_file: the mask file which has 1 for points in the domain (data will be used and strong constraints applied)
        mask_data: pointCollection.data object containing the mask.  If this is specified, mask_file is ignored
    Outputs:
        grids: (dict) a dictionary with entries 'z0', 'dz' and 't', each containing a fd_grid object
        bds: (dict) a dictionary specifying the domain bounds in x, y, and t (2-element vector for each)

    Each grid has an assigned location to which its points are mapped in the solution vector.  In this

    From left to right, the grids are z0, then dz
    '''
    bds = {
        coord: args['ctr'][coord] + np.array([-0.5, 0.5]) * args['W'][coord]
        for coord in ('x', 'y', 't')
    }
    grids = dict()
    if args['mask_data'] is not None:
        mask_file = None
    else:
        mask_file = args['mask_file']

    grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2),\
         name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'],\
         mask_data=args['mask_data'])

    grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \
        [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], \
         name='dz', col_0=grids['z0'].N_nodes, srs_proj4=args['srs_proj4'], \
         mask_file=mask_file, mask_data=args['mask_data'])
    grids['z0'].col_N = grids['dz'].col_N
    grids['t'] = fd_grid([bds['t']], [args['spacing']['dt']], name='t')

    grids['z0'].cell_area = calc_cell_area(grids['z0'])
    if np.any(grids['dz'].delta[0:2] > grids['z0'].delta):
        grids['dz'].cell_area = sum_cell_area(grids['z0'], grids['dz'])
    else:
        grids['dz'].cell_area = calc_cell_area(grids['dz'])

    return grids, bds
예제 #2
0
def select_repeat_data(data, grids, repeat_dt, resolution):
    """
        Select data that are repeats
        
        input arguments:
            data: input data
            grids: grids
            repeat_dt: time interval by which repeats must be separated to count
            resolution: spatial resolution of repeat calculation
    """
    repeat_grid = fd_grid(grids['z0'].bds,
                          resolution * np.ones(2),
                          name='repeat')
    t_coarse = np.round(
        (data.time - grids['dz'].bds[2][0]) / repeat_dt) * repeat_dt
    grid_repeat_count = np.zeros(np.prod(repeat_grid.shape))
    for t_val in np.unique(t_coarse):
        # select the data points for each epoch
        ii = t_coarse == t_val
        # use the lin_op.interp_mtx to find the grid points associated with each node
        grid_repeat_count += np.asarray(
            lin_op(repeat_grid).interp_mtx((
                data.y[ii], data.x[ii])).toCSR().sum(axis=0) > 0.5).ravel()
    data_repeats = lin_op(repeat_grid).interp_mtx(
        (data.y, data.x)).toCSR().dot(
            (grid_repeat_count > 1).astype(np.float64))
    return data_repeats > 0.5
예제 #3
0
def setup_PS_bias(data, G_data, constraint_op_list, grids, bds, args):
    '''
    set up a matrix to fit a smooth POCA-vs-Swath bias
    '''
    grids['PS_bias']=fd_grid( [bds['y'], bds['x']], \
       [args['spacing']['dz'], args['spacing']['dz']],\
       name='PS_bias', srs_proj4=args['srs_proj4'],\
       mask_file=args['mask_file'], mask_data=args['mask_data'], \
       col_0=grids['dz'].col_N)
    ps_mtx=lin_op(grid=grids['PS_bias'], name='PS_bias').\
        interp_mtx(data.coords()[0:2])
    # POCA rows should have zero entries
    temp = ps_mtx.v.ravel()
    temp[np.in1d(ps_mtx.r.ravel(), np.flatnonzero(data.swath == 0))] = 0
    ps_mtx.v = temp.reshape(ps_mtx.v.shape)
    G_data.add(ps_mtx)
    #Build a constraint matrix for the curvature of the PS bias
    grad2_ps = lin_op(grids['PS_bias'], name='grad2_PS').grad2(DOF='PS_bias')
    grad2_ps.expected=args['E_RMS_d2x_PS_bias']+np.zeros(grad2_ps.N_eq)/\
        np.sqrt(np.prod(grids['dz'].delta[0:2]))
    #Build a constraint matrix for the magnitude of the PS bias
    mag_ps=lin_op(grids['PS_bias'], name='mag_ps').data_bias(\
                ind=np.arange(grids['PS_bias'].N_nodes),
                col=np.arange(grids['PS_bias'].col_0, grids['PS_bias'].col_N))
    mag_ps.expected = args['E_RMS_PS_bias'] + np.zeros(mag_ps.N_eq)
    constraint_op_list.append(grad2_ps)
    #constraint_op_list.append(grad_ps)
    constraint_op_list.append(mag_ps)
예제 #4
0
def sum_cell_area(grid_f,
                  grid_c,
                  cell_area_f=None,
                  return_op=False,
                  sub0s=None,
                  taper=True):
    # calculate the area of masked cells in a coarse grid within the cells of a fine grid
    if cell_area_f is None:
        cell_area_f = calc_cell_area(grid_f) * grid_f.mask
    n_k = (grid_c.delta[0:2] / grid_f.delta[0:2] + 1).astype(int)
    temp_grid = fd_grid((grid_f.bds[0:2]), deltas=grid_f.delta[0:2])
    fine_to_coarse = lin_op(grid=temp_grid).sum_to_grid3(
        n_k, sub0s=sub0s, taper=True, valid_equations_only=False, dims=[0, 1])
    result = fine_to_coarse.toCSR().dot(cell_area_f.ravel()).reshape(
        grid_c.shape[0:2])
    if return_op:
        return result, fine_to_coarse
    return result
예제 #5
0
def setup_mask(data, grids, valid_data, bds, args):
    '''
    Mark datapoints for which the mask is zero as invalid
    Inputs:
    data: (pc.data) data structure.
    grids: (dict) dictionary of fd_grid objects generated by setup_grids
    valid_data: (numpy boolean array, size(data)) indicates valid data points
    bds: (dict) a dictionary specifying the domain bounds in x, y, and t (2-element vector for each)

    '''

    temp = fd_grid([bds['y'], bds['x']],
                   [args['spacing']['z0'], args['spacing']['z0']],
                   name='z0',
                   srs_proj4=args['srs_proj4'],
                   mask_file=args['mask_file'],
                   mask_data=args['mask_data'])
    data_mask = lin_op(temp, name='interp_z').interp_mtx(
        data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel())
    data_mask[~np.isfinite(data_mask)] = 0
    if np.any(data_mask == 0):
        data.index(~(data_mask == 0))
        valid_data[valid_data] = ~(data_mask == 0)
예제 #6
0
def smooth_xyt_fit(**kwargs):
    required_fields = ('data', 'W', 'ctr', 'spacing', 'E_RMS')
    args = {
        'reference_epoch': 0,
        'W_ctr': 1e4,
        'mask_file': None,
        'mask_scale': None,
        'compute_E': False,
        'max_iterations': 10,
        'srs_WKT': None,
        'N_subset': None,
        'bias_params': None,
        'repeat_res': None,
        'repeat_dt': 1,
        'Edit_only': False,
        'dzdt_lags': [1, 4],
        'VERBOSE': True
    }
    args.update(kwargs)
    for field in required_fields:
        if field not in kwargs:
            raise ValueError("%s must be defined", field)
    valid_data = np.ones_like(args['data'].x, dtype=bool)
    timing = dict()

    if args['N_subset'] is not None:
        tic = time()
        valid_data = edit_data_by_subset_fit(args['N_subset'], args)
        timing['edit_by_subset'] = time() - tic
        if args['Edit_only']:
            return {
                'timing': timing,
                'data': args['data'].copy().subset(valid_data)
            }
    m = dict()
    E = dict()

    # define the grids
    tic = time()
    bds = {
        coord: args['ctr'][coord] + np.array([-0.5, 0.5]) * args['W'][coord]
        for coord in ('x', 'y', 't')
    }
    grids = dict()
    grids['z0'] = fd_grid([bds['y'], bds['x']],
                          args['spacing']['z0'] * np.ones(2),
                          name='z0',
                          srs_WKT=args['srs_WKT'],
                          mask_file=args['mask_file'])
    grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \
        [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], col_0=grids['z0'].N_nodes, name='dz', srs_WKT=args['srs_WKT'], mask_file=args['mask_file'])
    grids['z0'].col_N = grids['dz'].col_N
    grids['t'] = fd_grid([bds['t']], [args['spacing']['dt']], name='t')

    # select only the data points that are within the grid bounds
    valid_z0 = grids['z0'].validate_pts((args['data'].coords()[0:2]))
    valid_dz = grids['dz'].validate_pts((args['data'].coords()))
    valid_data = valid_data & valid_dz & valid_z0

    # if repeat_res is given, resample the data to include only repeat data (to within a spatial tolerance of repeat_res)
    if args['repeat_res'] is not None:
        valid_data[valid_data]=valid_data[valid_data] & \
            select_repeat_data(args['data'].copy().subset(valid_data), grids, args['repeat_dt'], args['repeat_res'])

    # subset the data based on the valid mask
    data = args['data'].copy().subset(valid_data)

    # if we have a mask file, use it to subset the data
    # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails.
    if args['mask_file'] is not None:
        temp = fd_grid([bds['y'], bds['x']],
                       [args['spacing']['z0'], args['spacing']['z0']],
                       name='z0',
                       srs_WKT=args['srs_WKT'],
                       mask_file=args['mask_file'])
        data_mask = lin_op(temp, name='interp_z').interp_mtx(
            data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel())
        data_mask[~np.isfinite(data_mask)] = 0
        if np.any(data_mask == 0):
            data.subset(~(data_mask == 0))
            valid_data[valid_data] = ~(data_mask == 0)

    # define the interpolation operator, equal to the sum of the dz and z0 operators
    G_data = lin_op(grids['z0'],
                    name='interp_z').interp_mtx(data.coords()[0:2])
    G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords()))

    # define the smoothness constraints
    grad2_z0 = lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad2_dz = lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z',
                                                                 t_lag=1)
    grad_dzdt = lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z',
                                                                t_lag=1)
    constraint_op_list = [grad2_z0, grad2_dz, grad_dzdt]
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        d2z_dt2 = lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z')
        constraint_op_list.append(d2z_dt2)

    # if bias params are given, create a set of parameters to estimate them
    if args['bias_params'] is not None:
        data, bias_model = assign_bias_ID(data, args['bias_params'])
        G_bias, Gc_bias, Cvals_bias, bias_model = param_bias_matrix(
            data,
            bias_model,
            bias_param_name='bias_ID',
            col_0=grids['dz'].col_N)
        G_data.add(G_bias)
        constraint_op_list.append(Gc_bias)

    # put the equations together
    Gc = lin_op(None, name='constraints').vstack(constraint_op_list)
    N_eq = G_data.N_eq + Gc.N_eq

    # put together all the errors
    Ec = np.zeros(Gc.N_eq)
    root_delta_V_dz = np.sqrt(np.prod(grids['dz'].delta))
    root_delta_A_z0 = np.sqrt(np.prod(grids['z0'].delta))
    Ec[Gc.TOC['rows']['grad2_z0']] = args['E_RMS'][
        'd2z0_dx2'] / root_delta_A_z0 * grad2_z0.mask_for_ind0(
            args['mask_scale'])
    Ec[Gc.TOC['rows']['grad2_dzdt']] = args['E_RMS'][
        'd3z_dx2dt'] / root_delta_V_dz * grad2_dz.mask_for_ind0(
            args['mask_scale'])
    Ec[Gc.TOC['rows']['grad_dzdt']] = args['E_RMS'][
        'd2z_dxdt'] / root_delta_V_dz * grad_dzdt.mask_for_ind0(
            args['mask_scale'])
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        Ec[Gc.TOC['rows']
           ['d2z_dt2']] = args['E_RMS']['d2z_dt2'] / root_delta_V_dz
    if args['bias_params'] is not None:
        Ec[Gc.TOC['rows'][Gc_bias.name]] = Cvals_bias
    Ed = data.sigma.ravel()
    # calculate the inverse square root of the data covariance matrix
    TCinv = sp.dia_matrix((1. / np.concatenate((Ed, Ec)), 0),
                          shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs = np.zeros([N_eq])
    rhs[0:data.size] = data.z.ravel()

    # put the fit and constraint matrices together
    Gcoo = sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()
    cov_rows = G_data.N_eq + np.arange(Gc.N_eq)

    # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution:
    # Find the identify the rows and columns that match the reference epoch
    temp_r, temp_c = np.meshgrid(np.arange(0, grids['dz'].shape[0]),
                                 np.arange(0, grids['dz'].shape[1]))
    z02_mask = grids['dz'].global_ind([
        temp_r.transpose().ravel(),
        temp_c.transpose().ravel(),
        args['reference_epoch'] + np.zeros_like(temp_r).ravel()
    ])

    # Identify all of the DOFs that do not include the reference epoch
    cols = np.arange(G_data.col_N, dtype='int')
    include_cols = np.setdiff1d(cols, z02_mask)
    # Generate a matrix that has diagonal elements corresponding to all DOFs except the reference epoch.
    # Multiplying this by a matrix with columns for all model parameters yeilds a matrix with no columns
    # corresponding to the reference epoch.
    Ip_c = sp.coo_matrix((np.ones_like(include_cols),
                          (include_cols, np.arange(include_cols.size))),
                         shape=(Gc.col_N, include_cols.size)).tocsc()

    # eliminate the columns for the model variables that are set to zero
    Gcoo = Gcoo.dot(Ip_c)
    timing['setup'] = time() - tic

    if np.any(data.z > 2500):
        print('outlier!')
    # initialize the book-keeping matrices for the inversion
    m0 = np.zeros(Ip_c.shape[0])
    if "three_sigma_edit" in data.list_of_fields:
        inTSE = np.where(data.three_sigma_edit)[0]
    else:
        inTSE = np.arange(G_data.N_eq, dtype=int)
    if args['VERBOSE']:
        print("initial: %d:" % G_data.r.max())
    tic_iteration = time()
    for iteration in range(args['max_iterations']):
        # build the parsing matrix that removes invalid rows
        Ip_r = sp.coo_matrix(
            (np.ones(Gc.N_eq + inTSE.size),
             (np.arange(Gc.N_eq + inTSE.size), np.concatenate(
                 (inTSE, cov_rows)))),
            shape=(Gc.N_eq + inTSE.size, Gcoo.shape[0])).tocsc()

        m0_last = m0
        if args['VERBOSE']:
            print("starting qr solve for iteration %d" % iteration)
        # solve the equations
        tic = time()
        m0 = Ip_c.dot(
            sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)),
                           Ip_r.dot(TCinv.dot(rhs))))
        timing['sparseqr_solve'] = time() - tic

        # quit if the solution is too similar to the previous solution
        if (np.max(np.abs(
            (m0_last - m0)[Gc.TOC['cols']['dz']])) < 0.05) and (iteration > 2):
            break

        # calculate the full data residual
        rs_data = (data.z - G_data.toCSR().dot(m0)) / data.sigma
        # calculate the robust standard deviation of the scaled residuals for the selected data
        sigma_hat = RDE(rs_data[inTSE])
        inTSE_last = inTSE
        # select the data that are within 3*sigma of the solution
        inTSE = np.where(np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))[0]
        if args['VERBOSE']:
            print('found %d in TSE, sigma_hat=%3.3f' % (inTSE.size, sigma_hat))
        if (sigma_hat <= 1 or
            (inTSE.size == inTSE_last.size
             and np.all(inTSE_last == inTSE))) and (iteration > 2):
            if args['VERBOSE']:
                print("sigma_hat LT 1, exiting")
            break
    timing['iteration'] = time() - tic_iteration
    inTSE = inTSE_last
    valid_data[valid_data] = (np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))
    data.assign(
        {'three_sigma_edit': np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat)})
    # report the model-based estimate of the data points
    data.assign({'z_est': np.reshape(G_data.toCSR().dot(m0), data.shape)})

    # reshape the components of m to the grid shapes
    m['z0'] = np.reshape(m0[Gc.TOC['cols']['z0']], grids['z0'].shape)
    m['dz'] = np.reshape(m0[Gc.TOC['cols']['dz']], grids['dz'].shape)

    # calculate height rates
    for lag in args['dzdt_lags']:
        this_name = 'dzdt_lag%d' % lag
        m[this_name] = lin_op(grids['dz'], name='dzdt',
                              col_N=G_data.col_N).dzdt(lag=lag).grid_prod(m0)

    # build a matrix that takes the average of the central 20 km of the delta-z grid
    XR = np.mean(grids['z0'].bds[0]) + np.array([-1., 1.]) * args['W_ctr'] / 2.
    YR = np.mean(grids['z0'].bds[1]) + np.array([-1., 1.]) * args['W_ctr'] / 2.
    center_dzbar = lin_op(grids['dz'], name='center_dzbar',
                          col_N=G_data.col_N).vstack([
                              lin_op(grids['dz']).mean_of_bounds(
                                  (XR, YR, [season, season]))
                              for season in grids['dz'].ctrs[2]
                          ])
    G_dzbar = center_dzbar.toCSR()
    # calculate the grid mean of dz
    m['dz_bar'] = G_dzbar.dot(m0)

    # build a matrix that takes the lagged temporal derivative of dzbar (e.g. quarterly dzdt, annual dzdt)
    for lag in args['dzdt_lags']:
        this_name = 'dzdt_bar_lag%d' % lag
        this_op = lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR()
        # calculate the grid mean of dz/dt
        m[this_name] = this_op.dot(m['dz_bar'].ravel())

    # report the parameter biases.  Sorted in order of the parameter bias arguments
    #???
    if args['bias_params'] is not None:
        m['bias'] = parse_biases(m0, bias_model['bias_ID_dict'],
                                 args['bias_params'])

    # report the entire model vector, just in case we want it.

    m['all'] = m0

    # report the geolocation of the output map
    m['extent'] = np.concatenate((grids['z0'].bds[1], grids['z0'].bds[0]))

    # parse the resduals to assess the contributions of the total error:
    # Make the C matrix for the constraints
    TCinv_cov = sp.dia_matrix((1. / Ec, 0), shape=(Gc.N_eq, Gc.N_eq))
    rc = TCinv_cov.dot(Gc.toCSR().dot(m0))
    ru = Gc.toCSR().dot(m0)
    R = dict()
    RMS = dict()
    for eq_type in ['d2z_dt2', 'grad2_z0', 'grad2_dzdt']:
        if eq_type in Gc.TOC['rows']:
            R[eq_type] = np.sum(rc[Gc.TOC['rows'][eq_type]]**2)
            RMS[eq_type] = np.sqrt(np.mean(ru[Gc.TOC['rows'][eq_type]]**2))
    R['data'] = np.sum(((data.z_est - data.z) / data.sigma)**2)
    RMS['data'] = np.sqrt(np.mean((data.z_est - data.z)**2))

    # if we need to compute the errors in the solution, continue
    if args['compute_E']:
        tic = time()
        # take the QZ transform of Gcoo
        z, R, perm, rank = sparseqr.qz(Ip_r.dot(TCinv.dot(Gcoo)),
                                       Ip_r.dot(TCinv.dot(rhs)))
        z = z.ravel()
        R = R.tocsr()
        R.sort_indices()
        R.eliminate_zeros()
        timing['decompose_qz'] = time() - tic

        E0 = np.zeros(R.shape[0])

        # compute Rinv for use in propagating errors.
        # what should the tolerance be?  We will eventually square Rinv and take its
        # row-wise sum.  We care about errors at the cm level, so
        # size(Rinv)*tol^2 = 0.01 -> tol=sqrt(0.01/size(Rinv))~ 1E-4
        tic = time()
        RR, CC, VV, status = inv_tr_upper(R, np.int(np.prod(R.shape) / 4),
                                          1.e-5)
        # save Rinv as a sparse array.  The syntax perm[RR] undoes the permutation from QZ
        Rinv = sp.coo_matrix((VV, (perm[RR], CC)), shape=R.shape).tocsr()
        timing['Rinv_cython'] = time() - tic
        tic = time()
        E0 = np.sqrt(Rinv.power(2).sum(axis=1))
        timing['propagate_errors'] = time() - tic

        # generate the full E vector.  E0 appears to be an ndarray,
        E0 = np.array(Ip_c.dot(E0)).ravel()
        E['z0'] = np.reshape(E0[Gc.TOC['cols']['z0']], grids['z0'].shape)
        E['dz'] = np.reshape(E0[Gc.TOC['cols']['dz']], grids['dz'].shape)

        # generate the lagged dz errors:

        for lag in args['dzdt_lags']:
            this_name = 'dzdt_lag%d' % lag
            E[this_name] = lin_op(grids['dz'],
                                  name=this_name,
                                  col_N=G_data.col_N).dzdt(lag=lag).grid_error(
                                      Ip_c.dot(Rinv))

            this_name = 'dzdt_bar_lag%d' % lag
            this_op = lin_op(grids['t'], name=this_name).diff(lag=lag).toCSR()
            E[this_name] = np.sqrt(
                (this_op.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))
        # calculate the grid mean of dz/dt

        # generate the season-to-season errors
        #E['dzdt_qyr']=lin_op(grids['dz'], name='dzdt_1yr', col_N=G_data.col_N).dzdt().grid_error(Ip_c.dot(Rinv))

        # generate the annual errors
        #E['dzdt_1yr']=lin_op(grids['dz'], name='dzdt_1yr', col_N=G_data.col_N).dzdt(lag=4).grid_error(Ip_c.dot(Rinv))

        # generate the grid-mean error
        E['dz_bar'] = np.sqrt(
            (G_dzbar.dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))

        # generate the grid-mean quarterly dzdt error
        #E['dzdt_bar_qyr']=np.sqrt((ddt_qyr.dot(G_dzbar).dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))

        # generate the grid-mean annual dzdt error
        #E['dzdt_bar_1yr']=np.sqrt((ddt_1yr.dot(G_dzbar).dot(Ip_c).dot(Rinv)).power(2).sum(axis=1))

        # report the rgt bias errors.  Sorted by RGT, then by  cycle
        if args['bias_params'] is not None:
            E['bias'] = parse_biases(E0, bias_model['bias_ID_dict'],
                                     args['bias_params'])

    TOC = Gc.TOC
    return {
        'm': m,
        'E': E,
        'data': data,
        'grids': grids,
        'valid_data': valid_data,
        'TOC': TOC,
        'R': R,
        'RMS': RMS,
        'timing': timing,
        'E_RMS': args['E_RMS']
    }
예제 #7
0
from LSsurf.fd_grid import fd_grid
from LSsurf.lin_op import lin_op
import scipy.sparse as sp
import matplotlib.pyplot as plt
import numpy as np
from LSsurf.smooth_xytb_fit import sum_cell_area
from LSsurf.smooth_xytb_fit import calc_cell_area
from LSsurf.smooth_xytb_fit import setup_averaging_ops

xc = np.array([0, -5.e5])
deltas = [100., 100.]
bounds = [xc[1] + np.array([-3.e4, 3.e4]), xc[0] + np.array([-3.e4, 3.e4])]
srs_proj4 = '+proj=stere +lat_0=90 +lat_ts=70 +lon_0=-45 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs '

grid_z0 = fd_grid(bounds, deltas, srs_proj4=srs_proj4)
grid_dz = fd_grid(bounds + [np.array([0, 5])], [1.e3, 1.e3, 0.25])
grid_10km = fd_grid(bounds + [np.array([0, 5])], [1.e4, 1.e4, 0.25])

mask = np.zeros(grid_z0.ctrs[0].size * np.array([1, 1]))

mask[:, grid_z0.ctrs[1] < np.mean(grid_z0.ctrs[1]) - 150] = 1
grid_z0.mask = mask

cell_area_0 = calc_cell_area(grid_z0)
cell_area_1, op = sum_cell_area(grid_z0, grid_dz, return_op=True)

args = {'avg_scales': [1.e4], 'dzdt_lags': [1, 4]}

ops = setup_averaging_ops(grid_dz, grid_dz.col_N, args, cell_area=cell_area_1)
예제 #8
0
def smooth_xyt_fit(**kwargs):
    required_fields=('data','W','ctr','spacing','E_RMS')
    args={'reference_epoch':0,
    'W_ctr':1e4,
    'mask_file':None,
    'mask_scale':None,
    'compute_E':False,
    'max_iterations':10,
    'srs_proj4': None,
    'N_subset': None,
    'bias_params': None,
    'repeat_res':None,
    'converge_tol_dz':0.05,
    'repeat_dt': 1,
    'Edit_only': False,
    'dzdt_lags':[1, 4],
    'data_slope_sensors':None,
    'E_slope':0.05,
    'VERBOSE': True}
    args.update(kwargs)
    for field in required_fields:
        if field not in kwargs:
            raise ValueError("%s must be defined", field)
    valid_data = np.isfinite(args['data'].z) & np.isfinite(args['data'].sigma)
    timing=dict()

    if args['N_subset'] is not None:
        tic=time()
        valid_data &= edit_data_by_subset_fit(args['N_subset'], args)
        timing['edit_by_subset']=time()-tic
        if args['Edit_only']:
            return {'timing':timing, 'data':args['data'].copy()[valid_data]}
    m={}
    E={}
    R={}
    RMS={}

    # define the grids
    tic=time()
    bds={coord:args['ctr'][coord]+np.array([-0.5, 0.5])*args['W'][coord] for coord in ('x','y','t')}
    grids=dict()
    grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2),\
         name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'])
    grids['dz']=fd_grid( [bds['y'], bds['x'], bds['t']], \
        [args['spacing']['dz'], args['spacing']['dz'], args['spacing']['dt']], \
         name='dz', col_0=grids['z0'].N_nodes, srs_proj4=args['srs_proj4'], \
        mask_file=args['mask_file'])
    grids['z0'].col_N=grids['dz'].col_N
    grids['t']=fd_grid([bds['t']], [args['spacing']['dt']], name='t')

    # select only the data points that are within the grid bounds
    valid_z0=grids['z0'].validate_pts((args['data'].coords()[0:2]))
    valid_dz=grids['dz'].validate_pts((args['data'].coords()))
    valid_data=valid_data & valid_dz & valid_z0
    
    if not np.any(valid_data):
        return {'m':m, 'E':E, 'data':None, 'grids':grids, 'valid_data': valid_data, 'TOC':{},'R':{}, 'RMS':{}, 'timing':timing,'E_RMS':args['E_RMS']}

    # if repeat_res is given, resample the data to include only repeat data (to within a spatial tolerance of repeat_res)
    if args['repeat_res'] is not None:
        N_before_repeat=np.sum(valid_data)   
        valid_data[valid_data]=valid_data[valid_data] & \
            select_repeat_data(args['data'].copy_subset(valid_data), grids, args['repeat_dt'], args['repeat_res'], reference_time=grids['t'].ctrs[0][args['reference_epoch']])
        if args['VERBOSE']:
            print("before repeat editing found %d data" % N_before_repeat)
            print("after repeat editing found %d data" % valid_data.sum())

    # subset the data based on the valid mask
    data=args['data'].copy_subset(valid_data)

    # if we have a mask file, use it to subset the data
    # needs to be done after the valid subset because otherwise the interp_mtx for the mask file fails.
    if args['mask_file'] is not None:
        temp=fd_grid( [bds['y'], bds['x']], [args['spacing']['z0'], args['spacing']['z0']], name='z0', srs_proj4=args['srs_proj4'], mask_file=args['mask_file'])
        data_mask=lin_op(temp, name='interp_z').interp_mtx(data.coords()[0:2]).toCSR().dot(grids['z0'].mask.ravel())
        data_mask[~np.isfinite(data_mask)]=0
        if np.any(data_mask==0):
            data.index(~(data_mask==0))
            valid_data[valid_data]= ~(data_mask==0)

    # Check if we have any data.  If not, quit
    if data.size==0:
        return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, 'TOC':{},'R':{}, 'RMS':{}, 'timing':timing,'E_RMS':args['E_RMS']}

    # define the interpolation operator, equal to the sum of the dz and z0 operators
    G_data=lin_op(grids['z0'], name='interp_z').interp_mtx(data.coords()[0:2])
    G_data.add(lin_op(grids['dz'], name='interp_dz').interp_mtx(data.coords()))

     # define the smoothness constraints
    grad2_z0=lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad2_dz=lin_op(grids['dz'], name='grad2_dzdt').grad2_dzdt(DOF='z', t_lag=1)
    grad_dzdt=lin_op(grids['dz'], name='grad_dzdt').grad_dzdt(DOF='z', t_lag=1)
    constraint_op_list=[grad2_z0, grad2_dz, grad_dzdt]
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        d2z_dt2=lin_op(grids['dz'], name='d2z_dt2').d2z_dt2(DOF='z')
        constraint_op_list.append(d2z_dt2)

    # if bias params are given, create a set of parameters to estimate them
    if args['bias_params'] is not None:
        data, bias_model=assign_bias_ID(data, args['bias_params'])
        G_bias, Gc_bias, Cvals_bias, bias_model=\
            param_bias_matrix(data, bias_model, bias_param_name='bias_ID', 
                              col_0=grids['dz'].col_N)
        G_data.add(G_bias)
        constraint_op_list.append(Gc_bias)

    if args['data_slope_sensors'] is not None:
        bias_model['E_slope']=args['E_slope']
        G_slope_bias, Gc_slope_bias, Cvals_slope_bias, bias_model= data_slope_bias(data,  bias_model, sensors=args['data_slope_sensors'],  col_0=G_data.col_N)
        G_data.add(G_slope_bias)
        constraint_op_list.append(Gc_slope_bias)
    # put the equations together
    Gc=lin_op(None, name='constraints').vstack(constraint_op_list)
    N_eq=G_data.N_eq+Gc.N_eq

    # put together all the errors
    Ec=np.zeros(Gc.N_eq)
    root_delta_V_dz=np.sqrt(np.prod(grids['dz'].delta))
    root_delta_A_z0=np.sqrt(np.prod(grids['z0'].delta))
    Ec[Gc.TOC['rows']['grad2_z0']]=args['E_RMS']['d2z0_dx2']/root_delta_A_z0*grad2_z0.mask_for_ind0(args['mask_scale'])
    Ec[Gc.TOC['rows']['grad2_dzdt']]=args['E_RMS']['d3z_dx2dt']/root_delta_V_dz*grad2_dz.mask_for_ind0(args['mask_scale'])
    Ec[Gc.TOC['rows']['grad_dzdt']]=args['E_RMS']['d2z_dxdt']/root_delta_V_dz*grad_dzdt.mask_for_ind0(args['mask_scale'])
    if 'd2z_dt2' in args['E_RMS'] and args['E_RMS']['d2z_dt2'] is not None:
        Ec[Gc.TOC['rows']['d2z_dt2']]=args['E_RMS']['d2z_dt2']/root_delta_V_dz
    if args['bias_params'] is not None:
        Ec[Gc.TOC['rows'][Gc_bias.name]] = Cvals_bias
    if args['data_slope_sensors'] is not None:
        Ec[Gc.TOC['rows'][Gc_slope_bias.name]] = Cvals_slope_bias
    Ed=data.sigma.ravel()
    # calculate the inverse square root of the data covariance matrix
    TCinv=sp.dia_matrix((1./np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs=np.zeros([N_eq])
    rhs[0:data.size]=data.z.ravel()

    # put the fit and constraint matrices together
    Gcoo=sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()
    cov_rows=G_data.N_eq+np.arange(Gc.N_eq)
     
    # build a matrix that takes the average of the center of the delta-z grid
    # this gets used both in the averaging and error-calculation codes
    XR=np.mean(grids['z0'].bds[0])+np.array([-1., 1.])*args['W_ctr']/2.
    YR=np.mean(grids['z0'].bds[1])+np.array([-1., 1.])*args['W_ctr']/2.
    center_dzbar=lin_op(grids['dz'], name='center_dzbar', col_N=G_data.col_N).vstack([lin_op(grids['dz']).mean_of_bounds((XR, YR, [season, season] )) for season in grids['dz'].ctrs[2]])
    G_dzbar=center_dzbar.toCSR()

    # define the matrix that sets dz[reference_epoch]=0 by removing columns from the solution:
    # Find the rows and columns that match the reference epoch
    temp_r, temp_c=np.meshgrid(np.arange(0, grids['dz'].shape[0]), np.arange(0, grids['dz'].shape[1]))
    z02_mask=grids['dz'].global_ind([temp_r.transpose().ravel(), temp_c.transpose().ravel(),\
                  args['reference_epoch']+np.zeros_like(temp_r).ravel()])

    # Identify all of the DOFs that do not include the reference epoch
    cols=np.arange(G_data.col_N, dtype='int')
    include_cols=np.setdiff1d(cols, z02_mask)
    # Generate a matrix that has diagonal elements corresponding to all DOFs except the reference epoch.
    # Multiplying this by a matrix with columns for all model parameters yeilds a matrix with no columns
    # corresponding to the reference epoch.
    Ip_c=sp.coo_matrix((np.ones_like(include_cols), (include_cols, np.arange(include_cols.size))), \
                       shape=(Gc.col_N, include_cols.size)).tocsc()

    # eliminate the columns for the model variables that are set to zero
    Gcoo=Gcoo.dot(Ip_c)
    timing['setup']=time()-tic

    # initialize the book-keeping matrices for the inversion
    m0=np.zeros(Ip_c.shape[0])
    if "three_sigma_edit" in data.fields:
        inTSE=np.flatnonzero(data.three_sigma_edit)
    else:
        inTSE=np.arange(G_data.N_eq, dtype=int)
    inTSE_last = np.zeros([0])
    if args['VERBOSE']:
        print("initial: %d:" % G_data.r.max())
    tic_iteration=time()
    for iteration in range(args['max_iterations']):
        # build the parsing matrix that removes invalid rows
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), \
                           shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc()

        m0_last=m0
        if args['VERBOSE']:
            print("starting qr solve for iteration %d" % iteration)
        # solve the equations
        tic=time(); 
        m0=Ip_c.dot(sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)))); 
        timing['sparseqr_solve']=time()-tic

        # calculate the full data residual
        rs_data=(data.z-G_data.toCSR().dot(m0))/data.sigma
        # calculate the robust standard deviation of the scaled residuals for the selected data
        sigma_hat=RDE(rs_data[inTSE])
        
        # select the data that have scaled residuals < 3 *max(1, sigma_hat)
        inTSE_last=inTSE
        inTSE = np.flatnonzero(np.abs(rs_data) < 3.0 * np.maximum(1, sigma_hat))
        
        # quit if the solution is too similar to the previous solution
        if (np.max(np.abs((m0_last-m0)[Gc.TOC['cols']['dz']])) < args['converge_tol_dz']) and (iteration > 2):
            if args['VERBOSE']:
                print("Solution identical to previous iteration with tolerance %3.1f, exiting after iteration %d" % (args['converge_tol_dz'], iteration))
            break
        # select the data that are within 3*sigma of the solution
        if args['VERBOSE']:
            print('found %d in TSE, sigma_hat=%3.3f' % ( inTSE.size, sigma_hat ))
        if iteration > 0:
            if inTSE.size == inTSE_last.size and np.all( inTSE_last == inTSE ):
                if args['VERBOSE']:
                    print("filtering unchanged, exiting after iteration %d" % iteration)
                break 
        if iteration >= 2:
            if sigma_hat <= 1:
                if args['VERBOSE']:
                    print("sigma_hat LT 1, exiting after iteration %d" % iteration)
                break             

    # if we've done any iterations, parse the model and the data residuals
    if args['max_iterations'] > 0:
        timing['iteration']=time()-tic_iteration
        inTSE=inTSE_last
        valid_data[valid_data]=(np.abs(rs_data)<3.0*np.maximum(1, sigma_hat))
        data.assign({'three_sigma_edit':np.abs(rs_data)<3.0*np.maximum(1, sigma_hat)})
        # report the model-based estimate of the data points
        data.assign({'z_est':np.reshape(G_data.toCSR().dot(m0), data.shape)})
        parse_model(m, m0, G_data, G_dzbar, Gc.TOC, grids, args['bias_params'], bias_model, dzdt_lags=args['dzdt_lags'])
        # parse the resduals to assess the contributions of the total error:
        # Make the C matrix for the constraints
        TCinv_cov=sp.dia_matrix((1./Ec, 0), shape=(Gc.N_eq, Gc.N_eq))
        rc=TCinv_cov.dot(Gc.toCSR().dot(m0))
        ru=Gc.toCSR().dot(m0)
        for eq_type in ['d2z_dt2','grad2_z0','grad2_dzdt']:
            if eq_type in Gc.TOC['rows']:
                R[eq_type]=np.sum(rc[Gc.TOC['rows'][eq_type]]**2)
                RMS[eq_type]=np.sqrt(np.mean(ru[Gc.TOC['rows'][eq_type]]**2))
    R['data']=np.sum((((data.z_est[data.three_sigma_edit==1]-data.z[data.three_sigma_edit==1])/data.sigma[data.three_sigma_edit==1])**2))
    RMS['data']=np.sqrt(np.mean((data.z_est[data.three_sigma_edit==1]-data.z[data.three_sigma_edit==1])**2))

    # Compute the error in the solution if requested
    if args['compute_E']:
        # We have generally not done any iterations at this point, so need to make the Ip_r matrix
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), \
                           shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc()
        parse_errors(E, Gcoo, TCinv, rhs, Ip_c, Ip_r, grids, G_data, Gc, G_dzbar, \
                         bias_model, args['bias_params'], dzdt_lags=args['dzdt_lags'], timing=timing)

 

    TOC=Gc.TOC
    return {'m':m, 'E':E, 'data':data, 'grids':grids, 'valid_data': valid_data, 'TOC':TOC,'R':R, 'RMS':RMS, 'timing':timing,'E_RMS':args['E_RMS'], 'dzdt_lags':args['dzdt_lags']}
예제 #9
0
파일: glas_dhdt.py 프로젝트: whigg/LSsurf
def glas_fit(xy0=np.array((-150000, -2000000)), W0, D=None, E_RMS=None, gI=None, giFile='/Data/glas/GL/rel_634/GeoIndex.h5'):
    if gI is None:
        gI=geo_index().from_file(giFile)

    #import_D=False; print("WARNING::::::REUSING D")
    timing=dict()
    xy0=np.array((-150000, -2000000))
    E_RMS={'d2z0_dx2':20000./3000/3000, 'd3z_dx2dt':10./3000/3000, 'd2z_dxdt':100/3000, 'd2z_dt2':1}

    W={'x':W0, 'y':W0,'t':6}
    spacing={'z0':5.e2, 'dzdt':5.e3}

    args={'W':W, 'ctr':ctr, 'spacing':spacing, 'E_RMS':E_RMS, 'max_iterations':25}


    if D is None:
        fields=[ 'IceSVar', 'deltaEllip', 'numPk', 'ocElv', 'reflctUC', 'satElevCorr',  'time',  'x', 'y', 'z']
        ctr={'x':xy0[0], 'y':xy0[1], 't':(2003+2009)/2. }

        D=gI.query_xy_box(xy0[0]+np.array([-W['x']/2, W['x']/2]), xy0[1]+np.array([-W['y']/2, W['y']/2]), fields=fields)

        #plt.plot(xy[0], xy[1],'.')
        #plt.plot(xy0[0], xy0[1],'r*')

        D.assign({'year': matlabToYear(D.time)})
        good=(D.IceSVar < 0.035) & (D.reflctUC >0.05) & (D.satElevCorr < 1) & (D.numPk==1)
        D.subset(good, datasets=['x','y','z','year'])

        D.assign({'sigma':np.zeros_like(D.x)+0.2, 'time':D.year})
        plt.plot(D.x, D.y,'m.')

    bds={coord:args['ctr'][coord]+np.array([-0.5, 0.5])*args['W'][coord] for coord in ('x','y')}
    grids=dict()
    grids['z0']=fd_grid( [bds['y'], bds['x']], args['spacing']['z0']*np.ones(2), name='z0')
    grids['dzdt']=fd_grid( [bds['y'], bds['x']],  args['spacing']['dzdt']*np.ones(2), \
         col_0=grids['z0'].col_N+1, name='dzdt')

    valid_z0=grids['z0'].validate_pts((D.coords()[0:2]))
    valid_dz=grids['dzdt'].validate_pts((D.coords()))
    valid_data=valid_dz & valid_z0
    D=D.subset(valid_data)

    G_data=lin_op(grids['z0'], name='interp_z').interp_mtx(D.coords()[0:2])
    G_dzdt=lin_op(grids['dzdt'], name='dzdt').interp_mtx(D.coords()[0:2])
    G_dzdt.v *= (D.year[G_dzdt.r.astype(int)]-ctr['t'])
    G_data.add(G_dzdt)

    grad2_z0=lin_op(grids['z0'], name='grad2_z0').grad2(DOF='z0')
    grad_z0=lin_op(grids['z0'], name='grad_z0').grad(DOF='z0')
    grad2_dzdt=lin_op(grids['dzdt'], name='grad2_dzdt').grad2(DOF='dzdt')
    grad_dzdt=lin_op(grids['dzdt'], name='grad_dzdt').grad2(DOF='dzdt')
    Gc=lin_op(None, name='constraints').vstack((grad2_z0, grad_z0, grad2_dzdt, grad_dzdt))
    Ec=np.zeros(Gc.N_eq)
    root_delta_A_z0=np.sqrt(np.prod(grids['z0'].delta))
    Ec[Gc.TOC['rows']['grad2_z0']]=args['E_RMS']['d2z0_dx2']/root_delta_A_z0
    Ec[Gc.TOC['rows']['grad2_dzdt']]=args['E_RMS']['d3z_dx2dt']/root_delta_A_z0
    Ec[Gc.TOC['rows']['grad_z0']]=1.e4*args['E_RMS']['d2z0_dx2']/root_delta_A_z0
    Ec[Gc.TOC['rows']['grad_dzdt']]=1.e4*args['E_RMS']['d3z_dx2dt']/root_delta_A_z0

    Ed=D.sigma.ravel()

    N_eq=G_data.N_eq+Gc.N_eq

    # calculate the inverse square root of the data covariance matrix
    TCinv=sp.dia_matrix((1./np.concatenate((Ed, Ec)), 0), shape=(N_eq, N_eq))

    # define the right hand side of the equation
    rhs=np.zeros([N_eq])
    rhs[0:D.x.size]=D.z.ravel()

    # put the fit and constraint matrices together
    Gcoo=sp.vstack([G_data.toCSR(), Gc.toCSR()]).tocoo()
    cov_rows=G_data.N_eq+np.arange(Gc.N_eq)

    # initialize the book-keeping matrices for the inversion
    m0=np.zeros(Gcoo.shape[1])
    inTSE=np.arange(G_data.N_eq, dtype=int)

    for iteration in range(args['max_iterations']):
        # build the parsing matrix that removes invalid rows
        Ip_r=sp.coo_matrix((np.ones(Gc.N_eq+inTSE.size), (np.arange(Gc.N_eq+inTSE.size), np.concatenate((inTSE, cov_rows)))), shape=(Gc.N_eq+inTSE.size, Gcoo.shape[0])).tocsc()

        m0_last=m0
        # solve the equations
        tic=time();
        m0=sparseqr.solve(Ip_r.dot(TCinv.dot(Gcoo)), Ip_r.dot(TCinv.dot(rhs)));
        timing['sparseqr_solve']=time()-tic

        # quit if the solution is too similar to the previous solution
        if np.max(np.abs((m0_last-m0)[Gc.TOC['cols']['dzdt']])) < 0.05:
            break

        # calculate the full data residual
        rs_data=(D.z-G_data.toCSR().dot(m0))/D.sigma
        # calculate the robust standard deviation of the scaled residuals for the selected data
        sigma_hat=RDE(rs_data[inTSE])
        inTSE_last=inTSE
        # select the data that are within 3*sigma of the solution
        inTSE=np.where(np.abs(rs_data)<3.0*sigma_hat)[0]
        print('found %d in TSE, sigma_hat=%3.3f' % (inTSE.size, sigma_hat))
        if sigma_hat <= 1 or( inTSE.size == inTSE_last.size and np.all( inTSE_last == inTSE )):
            break
    m=dict()
    m['z0']=m0[Gc.TOC['cols']['z0']].reshape(grids['z0'].shape)
    m['dzdt']=m0[Gc.TOC['cols']['dzdt']].reshape(grids['dzdt'].shape)
    if DOPLOT:
        plt.subplot(121)
        plt.imshow(m['z0'])
        plt.colorbar()
        plt.subplot(122)
        plt.imshow(m['dzdt'])
        plt.colorbar()

    if False:
        plt.figure()
        Dfinal=D.subset(inTSE)
        ii=np.argsort(Dfinal.z)
        plt.scatter(Dfinal.x[ii], Dfinal.y[ii], c=Dfinal.z[ii]); plt.colorbar()


    return grids, m, D, inTSE, sigma_hat