Exemple #1
0
def snap_array(ds_in,
               ds_snap,
               tx_in,
               tx_snap,
               in_nodata,
               out_nodata,
               mask_val=None):

    ar_in = ds_in.ReadAsArray()
    if mask_val is not None:
        ar_snap = ds_snap.ReadAsArray()
    in_shape = ar_in.shape
    out_shape = ds_snap.RasterYSize, ds_snap.RasterXSize

    offset = calc_offset((tx_snap[0], tx_snap[3]), tx_in)
    snap_inds, in_inds = get_offset_array_indices(out_shape, in_shape, offset)
    np_dtype = ar_in.dtype
    ar = np.full(out_shape, out_nodata, dtype=np_dtype)
    ar_in[ar_in == in_nodata] = out_nodata
    ar[snap_inds[0]:snap_inds[1],
       snap_inds[2]:snap_inds[3]] = ar_in[in_inds[0]:in_inds[1],
                                          in_inds[2]:in_inds[3]]

    if mask_val is not None:
        mask_val = int(mask_val)
        ar[ar_snap == mask_val] = out_nodata

    return ar
Exemple #2
0
def main(sample_txt, ref_raster, pred_raster, p_nodata, t_nodata, target_col, bins, out_txt, match=None, predict_col=None):
    
    p_nodata = int(p_nodata)
    t_nodata = int(t_nodata)
    
    ds_p = gdal.Open(pred_raster)
    ar_p = ds_p.ReadAsArray()
    
    ds_r = gdal.Open(ref_raster)
    ar_r = ds_r.ReadAsArray()
    
    r_xsize = ds_r.RasterXSize
    r_ysize = ds_r.RasterYSize
    p_xsize = ds_p.RasterXSize
    p_ysize = ds_p.RasterYSize
    tx_r = ds_r.GetGeoTransform()
    tx_p = ds_p.GetGeoTransform()
    # If two arrays are different sizes, make prediction array match reference
    if not r_xsize == p_xsize or r_ysize == p_ysize or tx_r != tx_p:
        warnings.warn('Prediction and reference rasters do not share the same extent. Snapping prediction raster to reference....')
        offset = mosaic.calc_offset((tx_r[0], tx_r[3]), tx_p)
        t_inds, p_inds = mosaic.get_offset_array_indices((r_ysize, r_xsize), (p_ysize, p_xsize), offset)
        ar_buf = np.full(ar_r.shape, p_nodata, dtype=ar_p.dtype)
        ar_buf[t_inds[0]:t_inds[1], t_inds[2]:t_inds[3]] = ar_p[p_inds[0]:p_inds[1], p_inds[2]:p_inds[3]]
        ar_p = ar_buf.copy()
        del ar_buf
        
    bins = parse_bins(bins)
    
    sample = pd.read_csv(sample_txt, sep='\t')
    if target_col in sample.columns:
        t_sample = sample[target_col]
    else:
        raise IndexError('target_col "%s" not in sample' % target_col)
    
    if match:
        t_sample, p_sample = get_samples(ar_p, ar_r, p_nodata, t_nodata, sample, match=match)
    elif predict_col:
        p_sample = sample[predict_col]
    else:
        p_sample = ar_p[sample.row, sample.col]
        t_sample = ar_r[sample.row, sample.col]
    
    rmse = area_weighted_rmse(ar_p, ar_r, p_sample, t_sample, bins, p_nodata, out_txt=out_txt)
    
    return rmse
def main(params,
         ar_p=None,
         out_txt=None,
         inventory_txt=None,
         target_col=None,
         match=False,
         file_stamp=None):
    #p_path, t_path, bins, sample_txt, p_nodata, t_nodata, out_dir, inventory_txt=None

    # Read params and make variables from text
    inputs = read_params(params)
    for i in inputs:
        exec("{0} = str({1})").format(i, inputs[i])

    # Check that variables were specified in params
    try:
        bins = parse_bins(bins)
        p_nodata = int(p_nodata)
        t_nodata = int(t_nodata)
        str_check = sample_txt  #, target_col
    except NameError as e:
        print ''
        missing_var = str(e).split("'")[1]
        msg = "Variable '%s' not specified in param file:\n%s" % (missing_var,
                                                                  params)
        raise NameError(msg)

    #if out_dir_: # then out_dir came from predict_stem call
    #    out_dir = out_dir_
    #out_txt = os.path.join(out_dir, 'confusion.txt')
    if out_txt:
        out_dir = os.path.dirname(out_txt)
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        shutil.copy2(params, out_dir)

    # If p_path was specified, this call of the function is coming from outside
    #   predict_stem.py. Otherwise, ar_p should be given.
    if 'p_path' in locals():
        print 'Reading in the prediction raster:%s\n' % p_path
        ds_p = gdal.Open(p_path)
        ar_p = ds_p.ReadAsArray()

    ds_t = gdal.Open(t_path)
    band = ds_t.GetRasterBand(1)
    ar_t = band.ReadAsArray()
    #ar_t=ar_t.GetRasterBand(1)
    #print('read in the truth raster')
    t_xsize = ds_t.RasterXSize
    #print('t_xsize is: ', t_xsize)
    t_ysize = ds_t.RasterYSize
    #print('tYsize is: ', t_ysize)
    p_xsize = ds_p.RasterXSize
    #print('p_xsize is: ', p_xsize)
    p_ysize = ds_p.RasterYSize
    #print('p_ysize is: ', p_ysize)
    tx_t = ds_t.GetGeoTransform()
    tx_p = ds_p.GetGeoTransform()
    # If two arrays are different sizes, make prediction array match reference
    if not t_xsize == p_xsize or t_ysize == p_ysize or tx_t != tx_p:
        print('entered if statement')
        warnings.warn(
            'Prediction and reference rasters do not share the same extent. Snapping prediction raster to reference....'
        )
        offset = mosaic.calc_offset((tx_t[0], tx_t[3]), tx_p)
        #print(offset)
        t_inds, p_inds = mosaic.get_offset_array_indices(
            (t_ysize, t_xsize), (p_ysize, p_xsize), offset)
        print(t_inds, p_inds)
        ar_buf = np.full(ar_t.shape, p_nodata, dtype=ar_p.dtype)
        print ar_buf.shape
        ar_buf[t_inds[0]:t_inds[1],
               t_inds[2]:t_inds[3]] = ar_p[p_inds[0]:p_inds[1],
                                           p_inds[2]:p_inds[3]]
        ar_p = ar_buf.copy()
        del ar_buf
    mask = (ar_p == p_nodata) | (ar_t == t_nodata)  #'''

    samples = pd.read_csv(sample_txt, sep='\t', index_col='obs_id')
    print samples
    df_adj, df_smp = confusion_matrix_by_area(ar_p,
                                              ar_t,
                                              samples,
                                              p_nodata,
                                              t_nodata,
                                              mask=mask,
                                              bins=bins,
                                              out_txt=out_txt,
                                              target_col=target_col,
                                              match=match)

    ar_p = None
    ar_t = None
    mask = None

    accuracy = df_adj.ix['producer', 'user']
    kappa = df_adj.ix['producer', 'kappa']
    if inventory_txt and file_stamp:
        df_inv = pd.read_csv(inventory_txt, sep='\t', index_col='stamp')
        if file_stamp in df_inv.index and 'vote' in os.path.basename(out_dir):
            cols = ['vote_accuracy', 'vote_kappa']
            df_inv.ix[file_stamp, cols] = accuracy, kappa
            df_inv.to_csv(inventory_txt, sep='\t')
            print 'Vote scores written to inventory_txt: ', inventory_txt

        if file_stamp in df_inv.index and 'mean' in os.path.basename(out_dir):
            cols = ['mean_accuracy', 'mean_kappa']
            df_inv.ix[file_stamp, cols] = accuracy, kappa
            df_inv.to_csv(inventory_txt, sep='\t')

    return df_smp
Exemple #4
0
def main(in_raster,
         snap_raster,
         in_nodata,
         out_nodata,
         out_path=None,
         mask_val=None,
         overwrite=False):

    t0 = time.time()
    in_nodata = int(in_nodata)
    out_nodata = int(out_nodata)

    print '\nOpening datasets... '
    t1 = time.time()
    ds_in = gdal.Open(in_raster)
    ar_in = ds_in.ReadAsArray()
    tx_in = ds_in.GetGeoTransform()
    #driver = ds_in.GetDriver()
    ds_in = None

    ds_snap = gdal.Open(snap_raster)
    ar_snap = ds_snap.ReadAsArray()
    tx_snap = ds_snap.GetGeoTransform()
    prj = ds_snap.GetProjection()
    ds_snap = None
    print '%.1f seconds\n' % (time.time() - t1)

    print 'Snapping input raster...'
    t1 = time.time()
    offset = calc_offset((tx_snap[0], tx_snap[3]), tx_in)
    snap_inds, in_inds = get_offset_array_indices(ar_snap.shape, ar_in.shape,
                                                  offset)
    np_dtype = ar_in.dtype
    ar = np.full(ar_snap.shape, out_nodata, dtype=np_dtype)
    ar_in[ar_in == in_nodata] = out_nodata
    ar[snap_inds[0]:snap_inds[1],
       snap_inds[2]:snap_inds[3]] = ar_in[in_inds[0]:in_inds[1],
                                          in_inds[2]:in_inds[3]]

    if mask_val:
        mask_val = int(mask_val)
        ar[ar_snap == mask_val] = out_nodata

    print '%.1f seconds\n' % (time.time() - t1)

    if out_path:
        if ar.max() <= 255 and ar.min() >= 0:
            gdal_dtype = gdal.GDT_Byte
        else:
            gdal_dtype = gdal.GDT_Int16

        if os.path.exists(out_path) and not overwrite:
            sys.exit('out_path already exists')
        driver = get_gdal_driver(out_path)
        array_to_raster(ar, tx_snap, prj, driver, out_path, gdal_dtype,
                        out_nodata)

        # Write metadata
        desc = ('Input raster %s snapped to the extent of %s.') % (in_raster,
                                                                   snap_raster)
        if mask_val:
            desc += ' Data were masked from snap raster with value %s.' % mask_val
        createMetadata(sys.argv, out_path, description=desc)
    else:
        return ar

    print '\nTotal time to snap raster: %.1f seconds\n' % (time.time() - t0)
Exemple #5
0
def snap_by_tile(ds_in,
                 ds_snap,
                 tiles,
                 tx_snap,
                 tx_in,
                 in_nodata,
                 out_nodata,
                 out_dir,
                 mask_val=None):

    prj = ds_in.GetProjection()
    driver = gdal.GetDriverByName('gtiff')

    if mask_val is not None:
        mask_val = int(mask_val)

    row_off, col_off = calc_offset((tx_snap[0], tx_snap[3]), tx_in)
    in_size = ds_in.RasterYSize, ds_in.RasterXSize

    n_tiles = float(len(tiles))
    t1 = time.time()
    msg = '\rProccessing tile %d/%d (%.1f%%) || %.1f/~%.1f minutes'

    template = os.path.join(out_dir, 'tile_%s.pkl')
    mins = []
    maxs = []
    for i, (tile_id, coords) in enumerate(tiles.iterrows()):

        tile_off = row_off - coords.ul_r, col_off - coords.ul_c
        tile_size = coords.lr_r - coords.ul_r, coords.lr_c - coords.ul_c
        tile_inds, in_inds = get_offset_array_indices(tile_size, in_size,
                                                      tile_off)

        in_ulr, in_lrr, in_ulc, in_lrc = in_inds
        in_xsize = in_lrc - in_ulc
        in_ysize = in_lrr - in_ulr
        if in_xsize <= 0 or in_ysize <= 0:  # They don't overlap
            continue
        ar_in = ds_in.ReadAsArray(in_ulc, in_ulr, in_xsize, in_ysize)
        if np.all(ar_in == in_nodata):
            continue
        ar_out = np.full(tile_size, out_nodata, dtype=ar_in.dtype)
        ar_out[tile_inds[0]:tile_inds[1], tile_inds[2]:tile_inds[3]] = ar_in
        ar_out[ar_out == in_nodata] = out_nodata
        if mask_val is not None:
            mask = ds_snap.ReadAsArray(coords.ul_c, coords.ul_r, tile_size[1],
                                       tile_size[0]) == mask_val
            ar_out[mask] = out_nodata

        out_path = template % tile_id
        with open(out_path, 'wb') as f:
            pickle.dump(ar_out, f, protocol=-1)
        mins.append(ar_out.min())
        maxs.append(ar_out.max())
        tiles.loc[tile_id, 'file'] = out_path

        cum_time = (time.time() - t1) / 60.
        est_time = cum_time / (i + 1) * (n_tiles - i)  # estimate remaing time
        sys.stdout.write(msg % (i + 1, n_tiles,
                                (i + 1) / n_tiles * 100, cum_time, est_time))
        sys.stdout.flush()
        '''ulx, xres, _, uly, _, yres = tx_snap
        tx = coords.ul_c * xres + ulx, xres, 0, coords.ul_r * yres + uly, 0, yres
        array_to_raster(ar_out, tx, prj, driver, '/home/server/pi/homes/shooper/delete/tile_%s.tif' % tile_id, gdal.GDT_Int16, out_nodata)'''

    dtype = get_min_numpy_dtype(np.array(mins + maxs))

    return dtype