コード例 #1
0
def get_overlap_image_index(polygons,image_boxes,min_overlap_area=1):
    '''
    get the index of images that the polygons overlap
    :param polygons: a list of polygons
    :param image_boxes: the extent of the all the images
    :param min_overlap_area: minumum areas for checking the overlap, ignore the image if it's too small
    :return:
    '''

     # find the images which the polygons overlap (one or two images)
    img_idx = []
    # for a_poly in polygons:
    #     a_poly_json = mapping(a_poly)
    #     polygon_box = rasterio.features.bounds(a_poly_json)
    polygon_box = get_bounds_of_polygons(polygons)
    for idx, img_box in enumerate(image_boxes):
        if rasterio.coords.disjoint_bounds(img_box, polygon_box) is False:
            if idx not in img_idx:
                img_idx.append(idx)

    # check overlap
    for idx in img_idx:
        box_poly =  convert_image_bound_to_shapely_polygon(image_boxes[idx])
        poly_index = get_poly_index_within_extent(polygons,box_poly,min_overlap_area=min_overlap_area)
        # if no overlap, remove index
        if len(poly_index) < 1:
            img_idx.remove(idx)

    return img_idx
コード例 #2
0
def produce_dem_diff_grids(grid_polys, grid_ids, pre_name, reg_tifs,b_apply_matchtag, b_mosaic_id,b_mosaic_date,keep_dem_percent,o_res,process_num=4):

    dem_ext_polys = get_dem_tif_ext_polygons(reg_tifs)
    dem_diff_tifs = []
    # mosaic and crop
    if os.path.isfile(grid_dem_diff_less2dem_txt):
        grid_id_less2dem_list = [ int(item) for item in io_function.read_list_from_txt(grid_dem_diff_less2dem_txt) ]   # no need covert to int
    else:
        grid_id_less2dem_list = []
    for grid_id, grid_poly in zip(grid_ids, grid_polys):

        if grid_id in grid_id_less2dem_list:
            basic.outputlogMessage('skip grid %d, previous processing shows that, the count of DEM is smaller than 2'%grid_id)
            continue

        save_dir = 'grid_%d_tmp_files'%grid_id

        # check free disk space
        work_dir = './'
        free_GB = io_function.get_free_disk_space_GB(work_dir)
        total_wait_time = 0
        while free_GB < 50 and total_wait_time < 60*60*12:
            basic.outputlogMessage(' The free disk space (%.4f) is less than 50 GB, wait 60 seconds'%free_GB)
            time.sleep(60)
            total_wait_time += 60
            free_GB = io_function.get_free_disk_space_GB(work_dir)


        # get subset of tifs
        dem_poly_index = vector_gpd.get_poly_index_within_extent(dem_ext_polys, grid_poly)
        if len(dem_poly_index) < 1:
            basic.outputlogMessage('warning, no dem tifs within %d grid, skip' % grid_id)
            save_id_grid_no_valid_dem(grid_id)
            continue
        dem_list_sub = [reg_tifs[index] for index in dem_poly_index]

        # filter by month  # cancel, because it removes many good data.
        # dem_list_sub = filter_dem_by_month(dem_list_sub)

        mosaic_tif_list = mosaic_crop_dem(dem_list_sub, save_dir, grid_id, grid_poly, b_mosaic_id, b_mosaic_date,
                        process_num, keep_dem_percent, o_res, pre_name, resample_method='average',b_mask_matchtag=b_apply_matchtag,
                                          b_mask_stripDEM_outlier=b_mask_stripDEM_outlier,b_mask_surface_water=b_mask_surface_water,
                                          b_mosaic_year=b_mosaic_year)


        # dem co-registration (cancel, the result in not good with the default setting)

        # dem differencing
        save_dem_diff = os.path.join(grid_dem_diffs_dir, pre_name + '_DEM_diff_grid%d.tif'%grid_id)
        save_date_diff = os.path.join(grid_dem_diffs_dir, pre_name + '_date_diff_grid%d.tif'%grid_id)

        if dem_diff_newest_oldest(mosaic_tif_list, save_dem_diff, save_date_diff, process_num,
                               b_max_subsidence=b_max_subsidence,b_save_cm=True):
            dem_diff_tifs.append(save_dem_diff)
        else:
            save_id_grid_dem_less_2(grid_id)
            grid_id_less2dem_list.append(grid_id)
    return dem_diff_tifs
コード例 #3
0
def produce_matchtag_sum_grids(grid_polys,
                               grid_ids,
                               pre_name,
                               matchtag_tifs,
                               o_res,
                               process_num=4):

    dem_ext_polys = get_dem_tif_ext_polygons(matchtag_tifs)
    matchtag_sum_tifs = []
    # mosaic and crop
    for grid_id, grid_poly in zip(grid_ids, grid_polys):

        save_dir = 'grid_%d_tmp_files' % grid_id

        # check free disk space
        work_dir = './'
        free_GB = io_function.get_free_disk_space_GB(work_dir)
        total_wait_time = 0
        while free_GB < 50 and total_wait_time < 60 * 60 * 12:
            basic.outputlogMessage(
                ' The free disk space (%.4f) is less than 50 GB, wait 60 seconds'
                % free_GB)
            time.sleep(60)
            total_wait_time += 60
            free_GB = io_function.get_free_disk_space_GB(work_dir)

        # get subset of tifs
        dem_poly_index = vector_gpd.get_poly_index_within_extent(
            dem_ext_polys, grid_poly)
        if len(dem_poly_index) < 1:
            basic.outputlogMessage(
                'warning, no dem tifs within %d grid, skip' % grid_id)
            continue
        dem_list_sub = [matchtag_tifs[index] for index in dem_poly_index]

        mosaic_tif_list = mosaic_crop_dem(dem_list_sub,
                                          save_dir,
                                          grid_id,
                                          grid_poly,
                                          False,
                                          False,
                                          process_num,
                                          0,
                                          o_res,
                                          pre_name,
                                          resample_method='average')

        # sum matchtag
        save_matchtag_sum = os.path.join(
            grid_matchtag_sum_dir,
            pre_name + '_count%d' % len(mosaic_tif_list) +
            '_matchtag_sum_grid%d.tif' % grid_id)

        if sum_matchtag(mosaic_tif_list, save_matchtag_sum):
            matchtag_sum_tifs.append(save_matchtag_sum)

    return matchtag_sum_tifs
コード例 #4
0
def get_tar_list_sub(tar_dir, dem_polygons, dem_urls, extent_poly):

    dem_poly_ids = vector_gpd.get_poly_index_within_extent(
        dem_polygons, extent_poly)
    urls = [dem_urls[id] for id in dem_poly_ids]

    new_tar_list = []
    for ii, url in enumerate(urls):
        tmp = urlparse(url)
        filename = os.path.basename(tmp.path)
        save_dem_path = os.path.join(tar_dir, filename)
        if os.path.isfile(save_dem_path):
            new_tar_list.append(save_dem_path)
        else:
            basic.outputlogMessage(
                'Warning, %s not in %s, may need to download it first' %
                (filename, tar_dir))

    return new_tar_list
コード例 #5
0
def zonal_stats_one_polygon(idx, polygon, image_tiles, img_tile_polygons, stats, nodata=None,range=None,
                            band = 1,all_touched=True):

    overlap_index = vector_gpd.get_poly_index_within_extent(img_tile_polygons, polygon)
    image_list = [image_tiles[item] for item in overlap_index]

    if len(image_list) == 1:
        out_image, out_tran,nodata = raster_io.read_raster_in_polygons_mask(image_list[0], polygon, nodata=nodata,
                                                                     all_touched=all_touched,bands=band)
    elif len(image_list) > 1:
        # for the case it overlap more than one raster, need to produce a mosaic
        tmp_saved_files = []
        for k_img, image_path in enumerate(image_list):

            # print(image_path)
            tmp_save_path = os.path.splitext(os.path.basename(image_path))[0] + '_subset_poly%d'%idx +'.tif'
            _, _,nodata = raster_io.read_raster_in_polygons_mask(image_path, polygon,all_touched=all_touched,nodata=nodata,
                                                          bands=band, save_path=tmp_save_path)
            tmp_saved_files.append(tmp_save_path)

        # mosaic files in tmp_saved_files
        save_path = 'raster_for_poly%d.tif'%idx
        mosaic_args_list = ['gdal_merge.py', '-o', save_path,'-n',str(nodata),'-a_nodata',str(nodata)]
        mosaic_args_list.extend(tmp_saved_files)
        if basic.exec_command_args_list_one_file(mosaic_args_list,save_path) is False:
            raise IOError('error, obtain a mosaic (%s) failed'%save_path)

        # read the raster
        out_image, out_nodata = raster_io.read_raster_one_band_np(save_path,band=band)
        # remove temporal raster
        tmp_saved_files.append(save_path)
        for item in tmp_saved_files:
            io_function.delete_file_or_dir(item)

    else:
        basic.outputlogMessage('warning, cannot find raster for %d (start=0) polygon'%idx)
        return None

    # do calculation
    return array_stats(out_image, stats, nodata,range=range)
コード例 #6
0
ファイル: dem_mosaic_crop.py プロジェクト: yghlc/rs_data_proc
def main(options, args):

    save_dir = options.save_dir
    extent_shp = options.extent_shp
    process_num = options.process_num
    o_res = options.out_res
    b_mosaic_id = options.create_mosaic_id
    b_mosaic_date = options.create_mosaic_date
    keep_dem_percent = options.keep_dem_percent

    dem_dir_or_txt = args[0]
    if os.path.isfile(dem_dir_or_txt):
        dem_list = io_function.read_list_from_txt(dem_dir_or_txt)
    else:
        dem_list = io_function.get_file_list_by_ext('.tif', dem_dir_or_txt, bsub_folder=False)
        dem_list = [ tif for tif in dem_list if 'matchtag' not in tif ] # remove matchtag
    dem_count = len(dem_list)
    if dem_count < 1:
        raise ValueError('No input dem files in %s' % dem_dir_or_txt)

    resample_method= 'average'


    if extent_shp is None:
        # groups DEM based on the same strip ID
        dem_groups = group_demTif_strip_pair_ID(dem_list)
        # mosaic them direclty without consider the extent
        mosaic_dir = os.path.join(save_dir, 'dem_stripID_mosaic' )
        mosaic_dem_same_stripID(dem_groups, mosaic_dir, resample_method, process_num=process_num, save_source=True,
                                o_format='GTiff')
    else:
        extent_shp_base = os.path.splitext(os.path.basename(extent_shp))[0]
        dem_prj = map_projection.get_raster_or_vector_srs_info_epsg(dem_list[0])
        extent_prj = map_projection.get_raster_or_vector_srs_info_epsg(extent_shp)

        # # check projection (time-consuming if there are many tif files)
        # for dem_tif in dem_list:
        #     prj = map_projection.get_raster_or_vector_srs_info_epsg(dem_tif)
        #     if dem_prj != prj:
        #         raise ValueError('The projection inconsistent among dems (%s is different)'%dem_tif)

        dem_ext_polys = get_dem_tif_ext_polygons(dem_list)

        if extent_prj==dem_prj:
            extent_polys = vector_gpd.read_polygons_gpd(extent_shp)
        else:
            extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_prj)

        if len(extent_polys) < 1:
            raise ValueError('No polygons in %s' % extent_shp)
        else:
            basic.outputlogMessage('%d extent polygons in %s' % (len(extent_polys), extent_shp))

        extPolys_ids = vector_gpd.read_attribute_values_list(extent_shp, 'id')
        if extPolys_ids is None or None in extPolys_ids:
            basic.outputlogMessage('Warning, field: id is not in %s, will create default ID for each grid' % extent_shp)
            extPolys_ids = [id + 1 for id in range(len(extent_polys))]

        for idx, ext_poly in zip(extPolys_ids, extent_polys):
            basic.outputlogMessage('mosaic and crop DEM for the %d th extent (%d in total)' % (idx, len(extent_polys)))
            # get subset of DEM
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(dem_ext_polys, ext_poly)
            if len(dem_poly_ids) < 1:
                basic.outputlogMessage('no dem tifs within %d polygons'%idx)
                continue
            dem_list_sub = [dem_list[id] for id in dem_poly_ids]

            mosaic_crop_dem(dem_list_sub, save_dir, idx, ext_poly, b_mosaic_id, b_mosaic_date,
                                 process_num, keep_dem_percent, o_res, extent_shp_base, resample_method='average')
コード例 #7
0
ファイル: dem_mosaic_crop.py プロジェクト: yghlc/rs_data_proc
def mask_dem_by_surface_water(crop_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num):

    # get list of the ArcticDEM mosaic
    water_mask_tifs = io_function.get_file_list_by_ext('.tif',mask_water_dir,bsub_folder=False)
    water_mask_ext_polys = get_dem_tif_ext_polygons(water_mask_tifs)

    overlap_index = vector_gpd.get_poly_index_within_extent(water_mask_ext_polys,extent_poly)

    #### crop and mosaic water mask
    sub_mosaic_dem_tifs = [water_mask_tifs[item] for item in overlap_index]
    water_mask_crop_tif_list = []
    for tif in sub_mosaic_dem_tifs:
        save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) )
        if os.path.isfile(save_crop_path):
            basic.outputlogMessage('%s exists, skip' % save_crop_path)
            water_mask_crop_tif_list.append(save_crop_path)
        else:
            crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near',
                            o_format='VRT',out_res=o_res, same_extent=True,thread_num=process_num) #
            if crop_tif is False:
                raise ValueError('warning, crop %s failed' % tif)
            water_mask_crop_tif_list.append(crop_tif)
    if len(water_mask_crop_tif_list) < 1:
        basic.outputlogMessage('No water mask for %d grid'%extent_id)
        save_id_grid_no_watermask(extent_id)
        return None

    # create mosaic, can handle only input one file, but is slow
    save_water_mask_mosaic = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.tif'%extent_id)
    result = RSImageProcess.mosaic_crop_images_gdalwarp(water_mask_crop_tif_list, save_water_mask_mosaic, resampling_method='average',o_format='GTiff',
                                               compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num)
    if result is False:
        return False

    # because the resolution of dem and water mask is different, so we polygonize the watermask, then burn into the dem
    water_mask_shp = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.shp'%extent_id)
    if os.path.isfile(water_mask_shp):
        basic.outputlogMessage('%s exists, skip cropping' % water_mask_shp)
    else:
        # set 0 as nodata
        if raster_io.set_nodata_to_raster_metadata(save_water_mask_mosaic,0) is False:
            return False
        if vector_gpd.raster2shapefile(save_water_mask_mosaic,water_mask_shp,connect8=True) is None:
            return False

    # masking the strip version of DEMs
    mask_dem_list = []
    for idx, strip_dem in enumerate(crop_dem_list):
        save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskWater')
        if os.path.isfile(save_path):
            basic.outputlogMessage('%s exist, skip'%save_path)
            mask_dem_list.append(save_path)
            continue

        io_function.copy_file_to_dst(strip_dem,save_path,overwrite=True)
        nodata = raster_io.get_nodata(save_path)
        if raster_io.burn_polygon_to_raster_oneband(save_path,water_mask_shp,nodata) is False:
            continue
        mask_dem_list.append(save_path)

    return mask_dem_list
コード例 #8
0
ファイル: dem_mosaic_crop.py プロジェクト: yghlc/rs_data_proc
def mask_strip_dem_outlier_by_ArcticDEM_mosaic(crop_strip_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num):

    # get list of the ArcticDEM mosaic
    arcticDEM_mosaic_reg_tifs = io_function.get_file_list_by_ext('.tif',arcticDEM_tile_reg_tif_dir,bsub_folder=False)
    mosaic_dem_ext_polys = get_dem_tif_ext_polygons(arcticDEM_mosaic_reg_tifs)

    overlap_index = vector_gpd.get_poly_index_within_extent(mosaic_dem_ext_polys,extent_poly)

    #### crop and mosaic mosaic_reg_tifs
    sub_mosaic_dem_tifs = [arcticDEM_mosaic_reg_tifs[item] for item in overlap_index]
    mosaic_crop_tif_list = []
    for tif in sub_mosaic_dem_tifs:
        save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) )
        if os.path.isfile(save_crop_path):
            basic.outputlogMessage('%s exists, skip cropping' % save_crop_path)
            mosaic_crop_tif_list.append(save_crop_path)
        else:
            crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near',
                            o_format='VRT', out_res=o_res,same_extent=True,thread_num=process_num)
            if crop_tif is False:
                raise ValueError('warning, crop %s failed' % tif)
            mosaic_crop_tif_list.append(crop_tif)
    if len(mosaic_crop_tif_list) < 1:
        basic.outputlogMessage('No mosaic version of ArcticDEM for %d grid, skip mask_strip_dem_outlier_by_ArcticDEM_mosaic'%extent_id)
        return False

    # create mosaic, can handle only input one file, but is slow
    save_dem_mosaic = os.path.join(crop_tif_dir, 'ArcticDEM_tiles_grid%d.tif'%extent_id)
    result = RSImageProcess.mosaic_crop_images_gdalwarp(mosaic_crop_tif_list, save_dem_mosaic, resampling_method='average',o_format='GTiff',
                                               compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num)
    if result is False:
        return False

    height_tileDEM, width_tileDEM, count_tileDEM, dtype_tileDEM = raster_io.get_height_width_bandnum_dtype(save_dem_mosaic)
    tileDEM_data, tileDEM_nodata = raster_io.read_raster_one_band_np(save_dem_mosaic)
    # masking the strip version of DEMs
    mask_strip_dem_list = []
    for idx, strip_dem in enumerate(crop_strip_dem_list):
        save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskOutlier')
        if os.path.isfile(save_path):
            basic.outputlogMessage('%s exist, skip'%save_path)
            mask_strip_dem_list.append(save_path)
            continue

        # check band, with, height
        height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(strip_dem)
        if height_tileDEM != height or width_tileDEM != width or count_tileDEM != count:
            raise ValueError('size different between %s and %s' % (strip_dem, save_dem_mosaic))
        if count != 1:
            raise ValueError('DEM and Matchtag should only have one band')

        try:
            dem_data, nodata = raster_io.read_raster_one_band_np(strip_dem)
        except:
            basic.outputlogMessage(' invalid tif file: %s'%strip_dem)
            continue

        nodata_loc = np.where(dem_data == nodata)

        diff = dem_data - tileDEM_data
        # mask as nodata
        dem_data[np.abs(diff) > 50 ] = nodata  # ignore greater than 50 m
        dem_data[ nodata_loc ] = nodata         # may change some nodata pixel, change them back
        # save to file
        raster_io.save_numpy_array_to_rasterfile(dem_data, save_path, strip_dem, compress='lzw', tiled='yes',
                                                 bigtiff='if_safer')
        mask_strip_dem_list.append(save_path)

    return mask_strip_dem_list
コード例 #9
0
def get_grid_20(extent_shp_or_id_txt, grid_polys, ids):
    '''
    get grid polygons and ids based on input extent (polygon in shpaefile) or ids (txt file)
    if "file_name_base+'_grid_ids.txt'" exists, it will read id in this file directly.
    :param extent_shp_or_id_txt:
    :param grid_polys:
    :param ids:
    :return:
    '''

    io_function.is_file_exist(extent_shp_or_id_txt)

    if extent_shp_or_id_txt.endswith('.txt'):
        grid_ids = io_function.read_list_from_txt(extent_shp_or_id_txt)
        grid_ids = [int(item) for item in grid_ids ]
    else:
        shp_corresponding_grid_ids_txt = get_corresponding_grid_ids_txt(extent_shp_or_id_txt)
        if os.path.isfile(shp_corresponding_grid_ids_txt):
            print('corresponding grid ids txt file for %s exists, read grid id from txt'%extent_shp_or_id_txt)
            grid_ids = [ int(item) for item in io_function.read_list_from_txt(shp_corresponding_grid_ids_txt)]
            basic.outputlogMessage('read %d grids within the extents (%s)'
                                   % (len(grid_ids), os.path.basename(extent_shp_or_id_txt)))
        else:
            # extent polygons and projection (proj4)
            extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp_or_id_txt)
            if extent_shp_prj == '':
                raise ValueError('get proj4 of %s failed'%extent_shp_or_id_txt)
            grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp)
            if grid_shp_prj=='':
                raise ValueError('get proj4 of %s failed' % grid_20_shp)

            if extent_shp_prj != grid_shp_prj:
                basic.outputlogMessage('%s and %s do not have the same projection, will reproject %s'
                                       % (extent_shp_or_id_txt, grid_20_shp, os.path.basename(extent_shp_or_id_txt)))
                epsg = map_projection.get_raster_or_vector_srs_info_epsg(grid_20_shp)
                # print(epsg)
                # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip())
                extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp_or_id_txt, epsg)
            else:
                extent_polys = vector_gpd.read_polygons_gpd(extent_shp_or_id_txt)

            ext_poly_count = len(extent_polys)
            if ext_poly_count < 1:
                raise ValueError('No polygons in %s'%extent_shp_or_id_txt)
            grid_index = []
            # if there are many polygons, this will take time.
            for idx,ext_poly in enumerate(extent_polys):
                print(timeTools.get_now_time_str(), 'get grids for extent idx', idx, 'total polygons:',ext_poly_count)
                index = vector_gpd.get_poly_index_within_extent(grid_polys, ext_poly)
                grid_index.extend(index)
            grid_index = list(set(grid_index))  # remove duplicated ids
            basic.outputlogMessage('find %d grids within the extents (%s)' % (len(grid_index), os.path.basename(extent_shp_or_id_txt)) )

            grid_ids = [ ids[idx] for idx in grid_index]
            grid_ids_str = [str(item) for item in grid_ids ]
            io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,grid_ids_str)

    id_index = [ids.index(id) for id in grid_ids]
    selected_grid_polys = [grid_polys[idx] for idx in id_index ]

    return selected_grid_polys, grid_ids
コード例 #10
0
def extract_headwall_grids(grid_polys,
                           grid_ids,
                           pre_name,
                           reg_tifs,
                           b_mosaic_id,
                           b_mosaic_date,
                           keep_dem_percent,
                           o_res,
                           process_num=1):

    proc = psutil.Process(os.getpid())
    dem_ext_polys = get_dem_tif_ext_polygons(reg_tifs)
    headwall_shp_folders = []
    # mosaic and crop
    for grid_id, grid_poly in zip(grid_ids, grid_polys):

        save_dir = 'grid_%d_tmp_files' % grid_id

        # check free disk space
        work_dir = './'
        free_GB = io_function.get_free_disk_space_GB(work_dir)
        total_wait_time = 0
        while free_GB < 50 and total_wait_time < 60 * 60 * 12:
            basic.outputlogMessage(
                ' The free disk space (%.4f) is less than 50 GB, wait 60 seconds'
                % free_GB)
            time.sleep(60)
            total_wait_time += 60
            free_GB = io_function.get_free_disk_space_GB(work_dir)

        # get subset of tifs
        dem_poly_index = vector_gpd.get_poly_index_within_extent(
            dem_ext_polys, grid_poly)
        if len(dem_poly_index) < 1:
            basic.outputlogMessage(
                'warning, no dem tifs within %d grid, skip' % grid_id)
            save_id_grid_no_valid_dem(grid_id)
            continue
        dem_list_sub = [reg_tifs[index] for index in dem_poly_index]

        mosaic_tif_list = mosaic_crop_dem(
            dem_list_sub,
            save_dir,
            grid_id,
            grid_poly,
            b_mosaic_id,
            b_mosaic_date,
            process_num,
            keep_dem_percent,
            o_res,
            pre_name,
            resample_method='average',
            b_mask_matchtag=b_apply_matchtag,
            b_mask_stripDEM_outlier=b_mask_stripDEM_outlier,
            b_mask_surface_water=b_mask_surface_water,
            b_mosaic_year=b_mosaic_year)

        if len(mosaic_tif_list) < 1:
            basic.outputlogMessage(
                'warning, failed to get DEM mosaic for grid %d' % grid_id)
            continue
        # dem co-registration (cancel, the result in not good with the default setting)

        # to slope
        slope_tifs = dem_list_to_slope_list(mosaic_tif_list,
                                            save_dir,
                                            grid_id,
                                            process_num=process_num)

        # extract headwall
        multi_headwall_shp_dir = os.path.join(save_dir,
                                              'headwall_shp_sub_%d' % grid_id)
        if os.path.isdir(multi_headwall_shp_dir) is False:
            io_function.mkdir(multi_headwall_shp_dir)
        for idx, slope in enumerate(slope_tifs):
            working_dir = os.path.join(
                save_dir,
                os.path.splitext(os.path.basename(slope))[0])
            if os.path.isdir(working_dir) is False:
                io_function.mkdir(working_dir)
            # use polygon based medial axis
            # if extract_headwall_from_slope(idx, len(slope_tifs), slope, working_dir, multi_headwall_shp_dir, min_slope, min_size,
            #                                max_size, max_axis_width, max_box_WH, process_num) is False:
            #     basic.outputlogMessage('extract headwall from %s failed'%slope)

            # use raster based medial axis
            if extract_headwall_based_medial_axis_from_slope(
                    idx, len(slope_tifs), slope, working_dir,
                    multi_headwall_shp_dir, min_slope, min_size, max_size,
                    min_length, max_length, max_hole_count, max_axis_width,
                    process_num) is False:
                basic.outputlogMessage('extract headwall from %s failed' %
                                       slope)

        headwall_shp_list = io_function.get_file_list_by_ext(
            '.shp', multi_headwall_shp_dir, bsub_folder=False)
        if len(headwall_shp_list) < 1:
            basic.outputlogMessage('Warning, no headwall shapefile in %s' %
                                   multi_headwall_shp_dir)
            save_id_grid_no_headwall(grid_id)
            continue

        # merge headwall detected on different dates.
        save_headwall_folder = os.path.join(grid_dem_headwall_shp_dir,
                                            'headwall_shps_grid%d' % grid_id)
        if os.path.isdir(save_headwall_folder) is False:
            io_function.mkdir(save_headwall_folder)

        print('before merge_multi_headwall_shp_to_one, used memory:',
              proc.memory_info()[0] / (1024 * 1024 * 1024.0), 'GB')
        save_merged_shp = os.path.join(
            save_headwall_folder, 'headwall_shp_multiDates_%d.shp' % grid_id)
        if merge_multi_headwall_shp_to_one(headwall_shp_list,
                                           save_merged_shp) is False:
            continue

        # have not find a good method to merge them, just copy all of them now
        # res = os.system('cp -r %s %s'%(multi_headwall_shp_dir,save_headwall_folder))
        # if res != 0:
        #     basic.outputlogMessage('Copy %s failed'%multi_headwall_shp_dir)
        #     continue

        headwall_shp_folders.append(save_headwall_folder)

    return headwall_shp_folders
コード例 #11
0
def download_dem_tarball(dem_index_shp,
                         extent_polys,
                         save_folder,
                         pre_name,
                         reg_tif_dir=None,
                         poly_ids=None,
                         b_arcticDEM_tile=False):
    # read dem polygons and url
    dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'fileurl', b_fix_invalid_polygon=False)

    basic.outputlogMessage('%d dem polygons in %s' %
                           (len(dem_polygons), dem_index_shp))

    dem_tar_ball_list = []
    reg_tifs_list = []
    curr_dir = os.getcwd()
    b_save_grid_id_noDEM = True
    if poly_ids is None:
        poly_ids = [idx for idx in range(len(extent_polys))]
        b_save_grid_id_noDEM = False  # if poly_ids is not the global unique id, then don't save it.

    if os.path.isfile('no_registration_strips.txt'):
        no_registration_strips = io_function.read_list_from_txt(
            'no_registration_strips.txt')
    else:
        no_registration_strips = []

    # tarballs is being downloaded
    downloading_tarballs = []

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage('get data for the %d th extent (%d/%d)' %
                               (idx, count, len(extent_polys)))

        save_txt_path = pre_name + '_dem_urls_poly_%d.txt' % idx
        if os.path.isfile(save_txt_path):
            urls = io_function.read_list_from_txt(save_txt_path)
            basic.outputlogMessage('read %d dem urls from %s' %
                                   (len(urls), save_txt_path))
        else:
            # get fileurl
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(
                dem_polygons, ext_poly)
            basic.outputlogMessage('find %d DEM within %d th extent' %
                                   (len(dem_poly_ids), (idx)))
            urls = [dem_urls[id] for id in dem_poly_ids]

            # save to txt
            io_function.save_list_to_txt(save_txt_path, urls)
            basic.outputlogMessage('save dem urls to %s' % save_txt_path)

        if len(urls) > 0:

            # total_size_GB = get_total_size(urls)  # internet access, parallel running may cause problem. The info is not important
            # basic.outputlogMessage('the size of files will be downloaded is %.4lf GB for the %d th extent '%(total_size_GB,(idx+1)))
            # time.sleep(5)   # wait 5 seconds

            # download them using wget one by one
            for ii, url in enumerate(urls):
                tmp = urlparse(url)

                # in the Strip DEM, there are around 700 url are point to tif files, failed to download them
                # e.g. /mnt/pgc/data/elev/dem/setsm/ArcticDEM/geocell/v3.0/2m_temp/n59w137/SETSM_WV03_20150518_104001000B703200_104001000C715B00_seg8_2m_v3.0_dem.tif
                if url.startswith('/mnt') and url.endswith('.tif'):
                    basic.outputlogMessage("error: not a valid url: %s" % url)
                    continue

                filename = os.path.basename(tmp.path)
                save_dem_path = os.path.join(save_folder, filename)
                if reg_tif_dir is not None:
                    tar_base = os.path.basename(filename)[:-7]
                    # file_pattern = ['*dem_reg.tif', '*reg_dem.tif'] # Arctic strip and tile (mosaic) version
                    if b_arcticDEM_tile:
                        reg_tifs = io_function.get_file_list_by_pattern(
                            reg_tif_dir, tar_base + '*reg_dem.tif')
                    else:
                        reg_tifs = io_function.get_file_list_by_pattern(
                            reg_tif_dir, tar_base + '*dem_reg.tif')
                    if len(reg_tifs) > 0:
                        basic.outputlogMessage(
                            'warning, unpack and registrated tif for %s already exists, skip downloading'
                            % filename)
                        reg_tifs_list.append(reg_tifs[0])
                        continue

                    if './' + tar_base in no_registration_strips:
                        basic.outputlogMessage(
                            'warning, %s is in no_registration_strips list, skip downloading'
                            % filename)
                        continue

                if filename in downloading_tarballs:
                    basic.outputlogMessage(
                        'warning, %s is being downloaded by other processes' %
                        filename)
                    continue

                if os.path.isfile(
                        save_dem_path) and os.path.getsize(save_dem_path) > 1:
                    basic.outputlogMessage(
                        'warning, %s already exists, skip downloading' %
                        filename)
                else:
                    # download the dem
                    basic.outputlogMessage(
                        'starting downloading %d th DEM (%d in total)' %
                        ((ii + 1), len(urls)))
                    downloading_tarballs.append(filename)

                    # os.chdir(save_folder)

                    # run_a_process_download(url)  # download

                    ##################################################
                    # download in parallel
                    basic.check_exitcode_of_process(
                        download_tasks
                    )  # if there is one former job failed, then quit

                    while True:
                        job_count = basic.alive_process_count(download_tasks)
                        if job_count >= max_task_count:
                            print(
                                machine_name, datetime.now(),
                                'You are running %d or more tasks in parallel, wait '
                                % max_task_count)
                            time.sleep(60)  #
                            continue
                        break

                    # start the processing
                    sub_process = Process(
                        target=run_a_process_download,
                        args=(url, save_dem_path, reg_tif_dir, max_task_count,
                              b_unpack_after_downloading
                              ))  # start a process, don't wait
                    sub_process.start()
                    download_tasks.append(sub_process)

                    basic.close_remove_completed_process(download_tasks)

                    # os.chdir(curr_dir)

                dem_tar_ball_list.append(save_dem_path)

        else:
            basic.outputlogMessage(
                'Warning, can not find DEMs within %d th extent' % (idx))
            if b_save_grid_id_noDEM:
                save_id_grid_no_dem(idx)

    # wait until all task complete
    while True:
        job_count = basic.alive_process_count(download_tasks)
        if job_count > 0:
            print(
                machine_name, datetime.now(),
                'wait until all task are completed, alive task account: %d ' %
                job_count)
            time.sleep(60)  #
        else:
            break

    return dem_tar_ball_list, reg_tifs_list
コード例 #12
0
def get_file_size_dem_tarball(dem_index_shp,
                              extent_polys,
                              pre_name,
                              xlsx_size_path,
                              poly_ids=None):
    # read dem polygons and url
    dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'fileurl', b_fix_invalid_polygon=False)

    if os.path.isfile(xlsx_size_path):
        size_table = pd.read_excel(xlsx_size_path)
        save_idx_list = size_table['index'].to_list()
        save_url_list = size_table['fileurl'].to_list()
        save_size_list = size_table['filesize'].to_list()
    else:
        save_idx_list = [item for item in range(len(dem_urls))]  # index list
        save_url_list = dem_urls
        save_size_list = [float('nan')] * len(save_idx_list)

    basic.outputlogMessage('%d dem polygons in %s' %
                           (len(dem_polygons), dem_index_shp))

    if poly_ids is None:
        poly_ids = [item for item in range(len(extent_polys))]

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage(
            'get ArcticDEM filesize for the %d th extent (%d/%d)' %
            (idx, count, len(extent_polys)))

        save_filesize_txt = pre_name + '_dem_FileSize_poly_%d.txt' % idx
        if os.path.isfile(save_filesize_txt):
            basic.outputlogMessage('%s exists, skip' % save_filesize_txt)
            continue

        # get fileurl
        dem_poly_idx_list = vector_gpd.get_poly_index_within_extent(
            dem_polygons, ext_poly)
        basic.outputlogMessage('find %d DEM within %d th extent' %
                               (len(dem_poly_idx_list), (idx)))
        urls = [dem_urls[id] for id in dem_poly_idx_list]
        url_size_list = [save_size_list[id] for id in dem_poly_idx_list]

        if len(urls) > 0:
            total_count = len(urls)
            for ii, (url, fileS, url_idx) in enumerate(
                    zip(urls, url_size_list, dem_poly_idx_list)):
                # remove url start with /mnt and end with .tif
                if url.startswith('/mnt') and url.endswith('.tif'):
                    basic.outputlogMessage("error: not a valid url: %s" % url)
                    continue
                if math.isnan(fileS) is False:
                    continue
                url_size_GB = get_one_url_file_size(url, ii, total_count)
                url_size_list[ii] = url_size_GB
                save_size_list[url_idx] = url_size_GB

            url_size_list_noNone = [
                item for item in url_size_list if math.isnan(item) is False
            ]

            if len(url_size_list_noNone) != len(url_size_list):
                basic.outputlogMessage(
                    'There are %d None value in url_size_list' %
                    (len(url_size_list) - len(url_size_list_noNone)))

            total_size_GB = sum(url_size_list_noNone)

            basic.outputlogMessage(
                'the size of files will be downloaded is %.4lf GB for the %d th extent '
                % (total_size_GB, (idx + 1)))
            with open(save_filesize_txt, 'w') as f_obj:
                f_obj.writelines('%d DEM files, total size is  %.6lf GB \n' %
                                 (len(urls), total_size_GB))
        else:
            basic.outputlogMessage(
                'Warning, can not find DEMs within %d th extent' % (idx))

    # save table
    save_dict = {
        'index': save_idx_list,
        'filesize': save_size_list,
        'fileurl': save_url_list
    }

    save_dict_pd = pd.DataFrame(save_dict)
    # set strings to url as  False: Number of URLS is over Excel's limit of 65,530 URLS per worksheet
    # https://github.com/cxn03651/write_xlsx/issues/42
    with pd.ExcelWriter(xlsx_size_path, options={'strings_to_urls':
                                                 False}) as writer:
        save_dict_pd.to_excel(writer, sheet_name='url_file_size')

    return None
コード例 #13
0
def get_overlap_grids_for_one_extent(all_ids, all_grid_polys, dem_poly,
                                     dem_name, idx, dem_poly_count):
    print(timeTools.get_now_time_str(), idx, dem_poly_count)
    index = vector_gpd.get_poly_index_within_extent(all_grid_polys, dem_poly)
    gird_ids = [all_ids[idx] for idx in index]
    return dem_name, gird_ids
コード例 #14
0
def main():
    dem_index_shp = os.path.expanduser(
        '~/Data/Arctic/ArcticDEM/BROWSE_SERVER/indexes/ArcticDEM_Tile_Index_Rel7/ArcticDEM_Tile_Index_Rel7.shp'
    )
    # extent_shp = os.path.expanduser('~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp')
    extent_shp = os.path.expanduser(
        '~/Data/PDO/extent_each_swatch/merge_all_qa_exent.shp')

    # extent polygons and projection (proj4)
    extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        extent_shp)
    dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        dem_index_shp)

    if extent_shp_prj != dem_shp_prj:
        basic.outputlogMessage(
            '%s and %s do not have the same projection, will reproject %s' %
            (extent_shp, dem_index_shp, os.path.basename(extent_shp)))
        epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp)
        # print(epsg)
        # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip())
        extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg)
    else:
        extent_polys = vector_gpd.read_polygons_gpd(extent_shp)

    poly_ids = [idx for idx in range(len(extent_polys))]
    if 'boxes' in os.path.basename(extent_shp):
        nc_file_names = vector_gpd.read_attribute_values_list(
            extent_shp, 'nc_file')
    else:
        nc_file_names = vector_gpd.read_attribute_values_list(
            extent_shp, 'layer')

    # read dem polygons and tile number
    dem_polygons, dem_tiles = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'tile', b_fix_invalid_polygon=False)

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage('get data for the %d th extent (%d/%d)' %
                               (idx, count, len(extent_polys)))

        save_txt_path = nc_file_names[idx] + '-' + 'dem_tiles_poly_%d.txt' % idx
        if os.path.isfile(save_txt_path):
            tiles = io_function.read_list_from_txt(save_txt_path)
            basic.outputlogMessage('read %d dem tiles from %s' %
                                   (len(tiles), save_txt_path))
        else:
            # get fileurl
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(
                dem_polygons, ext_poly)
            basic.outputlogMessage('find %d DEM within %d th extent' %
                                   (len(dem_poly_ids), (idx)))
            tiles = [dem_tiles[id] for id in dem_poly_ids]

            # save to txt
            io_function.save_list_to_txt(save_txt_path, tiles)
            basic.outputlogMessage('save dem urls to %s' % save_txt_path)

        # download and create a mosaic
        url_head = 'https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/32m/'
        download_tarball_for_one_polygon(tarball_dir, dem_tif_dir, url_head,
                                         tiles)

        # create a mosaic
        create_a_mosaic(nc_file_names[idx], idx, dem_eachSwatch_dir, ext_poly,
                        tiles)

    bak_folder = 'small_tifs'
    io_function.mkdir(bak_folder)
    # remove small and duplicated ones
    for file_name in nc_file_names:
        crop_tifs = io_function.get_file_list_by_pattern(
            dem_eachSwatch_dir, file_name + '*crop.tif')
        if len(crop_tifs) == 1:
            pass
        elif len(crop_tifs) > 1:
            # keep maximum one and move small ones
            tif_files_size = [
                io_function.get_file_size_bytes(item) for item in crop_tifs
            ]
            max_size = max(tif_files_size)
            max_index = tif_files_size.index(max_size)
            del crop_tifs[max_index]
            for tmp in crop_tifs:
                io_function.movefiletodir(tmp, bak_folder)
                tmp = tmp.replace('_crop', '')
                io_function.movefiletodir(tmp, bak_folder)

        else:  # no tif
            raise ValueError('Results for %s does not exist' % file_name)