Python read_raster_one_band_np 예제들, raster_io.read_raster_one_band_np Python 예제들

예제 #1

0

파일 보기

파일: flooding_proc_accuracy.py 프로젝트: Summer0328/Landuse_DL

def mask_by_surface_water(map_raster, surface_water_crop):

    # save mask result to current folder
    save_mask_result = io_function.get_name_by_adding_tail(
        os.path.basename(map_raster), 'WaterMask')
    if os.path.isfile(save_mask_result):
        print('warning, %s exists' % save_mask_result)
        return save_mask_result

    # read
    map_array_2d, nodata = raster_io.read_raster_one_band_np(map_raster)
    water_array_2d, _ = raster_io.read_raster_one_band_np(surface_water_crop)

    print(map_array_2d.shape)
    if map_array_2d.shape != water_array_2d.shape:
        raise ValueError('size inconsistent: %s and %s' %
                         (str(map_array_2d.shape), str(water_array_2d.shape)))

    # mask out pixel, original is water or others
    map_array_2d[np.logical_or(water_array_2d == 1, water_array_2d == 255)] = 0

    if raster_io.save_numpy_array_to_rasterfile(map_array_2d,
                                                save_mask_result,
                                                map_raster,
                                                compress='lzw',
                                                tiled='Yes',
                                                bigtiff='if_safer'):
        return save_mask_result

예제 #2

0

파일 보기

파일: flooding_proc_accuracy.py 프로젝트: Summer0328/Landuse_DL

def mask_by_elevation(map_raster_path, elevation_crop_path, threashold):

    # save mask result to current folder
    save_mask_result = io_function.get_name_by_adding_tail(
        os.path.basename(map_raster_path), 'DEMMask')
    if os.path.isfile(save_mask_result):
        print('warning, %s exists' % save_mask_result)
        return save_mask_result

    # read
    map_array_2d, nodata = raster_io.read_raster_one_band_np(map_raster_path)
    dem_array_2d, _ = raster_io.read_raster_one_band_np(elevation_crop_path)

    print(map_array_2d.shape)
    if map_array_2d.shape != dem_array_2d.shape:
        raise ValueError('size inconsistent: %s and %s' %
                         (str(map_array_2d.shape), str(dem_array_2d.shape)))

    # mask out pixel with high elevation
    map_array_2d[dem_array_2d > threashold] = 0

    if raster_io.save_numpy_array_to_rasterfile(map_array_2d,
                                                save_mask_result,
                                                map_raster_path,
                                                compress='lzw',
                                                tiled='Yes',
                                                bigtiff='if_safer'):
        return save_mask_result

예제 #3

0

파일 보기

def slope_tif_to_slope_shapefile(slope_tif,slope_bin_path,slope_threshold):

    if os.path.isfile(slope_bin_path):
        print('%s exist'%slope_bin_path)
    else:
        slope_data, nodata = raster_io.read_raster_one_band_np(slope_tif)
        bin_slope = np.zeros_like(slope_data,dtype=np.uint8)
        bin_slope[slope_data > slope_threshold] = 1
        bin_slope[slope_data > 88] = 0          # if slope is too large, it may caused by artifacts, so remove them

        # # Dilation or opening
        # # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        # kernel = np.ones((3, 3), np.uint8)  # if kernal is 5 or larger, will remove some narrow parts.
        # # bin_slope = cv2.dilate(bin_slope,kernel,iterations = 1)
        # bin_slope = cv2.morphologyEx(bin_slope, cv2.MORPH_OPEN, kernel)     # use opening to remove some noise
        # # bin_slope = cv2.morphologyEx(bin_slope, cv2.MORPH_CLOSE, kernel)    # closing small holes inside

        # save
        slope_bin = bin_slope*255
        raster_io.save_numpy_array_to_rasterfile(slope_bin,slope_bin_path,slope_tif,nodata=0,compress='lzw',tiled='yes',bigtiff='if_safer')   # set nodata as 0

    # to shapefile
    slope_bin_shp = vector_gpd.raster2shapefile(slope_bin_path,connect8=True)
    if slope_bin_shp is None:
        return False
    return slope_bin_shp

예제 #4

0

파일 보기

파일: get_color_map_for_raster_inQGIS.py 프로젝트: yghlc/Landuse_DL

def get_new_color_table_for_raster(raster, color_table_txt, out_dir=None):
    data, no_data = raster_io.read_raster_one_band_np(raster)

    # remove nodata
    data_1d = data.flatten()
    print(data_1d.shape)
    data_1d = data_1d[data_1d != no_data]
    print(data_1d.shape)
    unique_values = np.unique(data_1d)
    print('unique_values:', unique_values)

    save_color_table = io_function.get_name_no_ext(
        os.path.basename(raster)) + '_color.txt'
    if out_dir is not None:
        save_color_table = os.path.join(out_dir, save_color_table)

    save_lines = []
    with open(color_table_txt, 'r') as f_obj:
        all_lines = f_obj.readlines()

        # copy first two lines
        save_lines.append(all_lines[0])
        save_lines.append(all_lines[1])

        for idx in range(2, len(all_lines)):
            value = int(all_lines[idx].split(',')[0])
            if value in unique_values:
                save_lines.append(all_lines[idx])

    with open(save_color_table, 'w') as f_obj:
        f_obj.writelines(save_lines)

    print('Save color table to %s' % os.path.abspath(save_color_table))

예제 #5

0

파일 보기

def sum_matchtag(input_tifs, save_path):

    if len(input_tifs) < 1:
        return False
    # check band, with, height
    height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(
        input_tifs[0])
    for idx in range(1, len(input_tifs)):
        h, w, c, type = raster_io.get_height_width_bandnum_dtype(
            input_tifs[idx])
        if h != height or w != width or c != count or type != dtype:
            raise ValueError(
                'size or data type is different between %s and %s' %
                (input_tifs[0], input_tifs[idx]))

    if count != 1:
        raise ValueError('Matchtag should only have one band')

    sum_data = np.zeros((height, width), dtype=np.uint8)
    for tif in input_tifs:
        data, nodata = raster_io.read_raster_one_band_np(tif)
        # print(data.shape)
        sum_data += data

    # save to file
    raster_io.save_numpy_array_to_rasterfile(sum_data,
                                             save_path,
                                             input_tifs[0],
                                             compress='lzw',
                                             tiled='yes',
                                             bigtiff='if_safer')
    return True

예제 #6

0

파일 보기

파일: image_segment.py 프로젝트: yghlc/rs_data_proc

def segment_changes_on_dem_diff(dem_diff_tif, save_dir):

    out_pre = os.path.splitext(os.path.basename(dem_diff_tif))[0]

    # read images
    one_band_img, nodata = raster_io.read_raster_one_band_np(dem_diff_tif)

    # segmentation algorithm (the output of these algorithms is not alway good, need to chose the parameters carafully)
    # out_labels = watershed_segmentation(one_band_img)
    # out_labels = k_mean_cluster_segmentation(one_band_img)
    # out_labels = quickshift_segmentaion(one_band_img)
    out_labels = mean_shift_segmentation(one_band_img)

    # segmentation by threshold (may have too many noise)
    # mean = np.nanmean(one_band_img)
    # print("mean value is: %.4f"%mean)
    # one_band_img = one_band_img - mean
    # out_labels = np.zeros_like(one_band_img,dtype=np.uint8)
    # out_labels[ np.abs(one_band_img) > 2 ] = 1

    # save the label
    label_path = os.path.join(save_dir, out_pre + '_label.tif')
    raster_io.save_numpy_array_to_rasterfile(out_labels,
                                             label_path,
                                             dem_diff_tif,
                                             nodata=0)

    # convert the label to shapefile
    out_shp = os.path.join(save_dir, out_pre + '.shp')
    command_string = 'gdal_polygonize.py -8 %s -b 1 -f "ESRI Shapefile" %s' % (
        label_path, out_shp)
    res = os.system(command_string)
    if res != 0:
        sys.exit(1)

예제 #7

0

파일 보기

파일: dem_segment_subsidence.py 프로젝트: yghlc/rs_data_proc

def segment_subsidence_on_dem_diff(dem_diff_tif, save_dir):

    out_pre = os.path.splitext(os.path.basename(dem_diff_tif))[0]

    # read images
    one_band_img, nodata = raster_io.read_raster_one_band_np(dem_diff_tif)

    # segmentation by threshold (may have too many noise)
    # mean = np.nanmean(one_band_img)
    # print("mean value is: %.4f"%mean)
    # one_band_img = one_band_img - mean    # cannot use mean which may affect by some Outliers
    out_labels = np.zeros_like(one_band_img,dtype=np.uint8)
    out_labels[ one_band_img < -2 ] = 1     # end in a lot of noise, change to -2, -1 results in a lot of polygons

    # apply median filter
    out_labels = cv2.medianBlur(out_labels, 3)  # with kernal=3

    # save the label
    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)
    label_path = os.path.join(save_dir, out_pre + '_label.tif')
    raster_io.save_numpy_array_to_rasterfile(out_labels, label_path, dem_diff_tif, nodata=0)

    # convert the label to shapefile
    out_shp = os.path.join(save_dir, out_pre + '.shp')
    command_string = 'gdal_polygonize.py -8 %s -b 1 -f "ESRI Shapefile" %s' % (label_path, out_shp)
    res = os.system(command_string)
    if res != 0:
        sys.exit(1)

    # post-processing
    post_processing_subsidence(out_shp)

예제 #8

0

파일 보기

파일: dem_difference.py 프로젝트: yghlc/rs_data_proc

def read_date_dem_to_memory(pair_idx,
                            pair,
                            date_pair_list_sorted,
                            dem_data_dict,
                            dem_groups_date,
                            less_memory=False,
                            boundary=None):

    if less_memory is False:
        # read data to memory if need, then store in memory, avoid to read them again.
        # for a large area, because we read all raster to memory, it will cause "out of memory problem"
        if pair[0] not in dem_data_dict.keys():
            data_old, nodata_old = raster_io.read_raster_one_band_np(
                dem_groups_date[pair[0]][0], boundary=boundary)
            data_old[data_old == nodata_old] = np.nan
            dem_data_dict[pair[0]] = data_old
        else:
            data_old = dem_data_dict[pair[0]]

        # read data to memory if need
        if pair[1] not in dem_data_dict.keys():
            data_new, nodata_new = raster_io.read_raster_one_band_np(
                dem_groups_date[pair[1]][0], boundary=boundary)
            data_new[data_new == nodata_new] = np.nan
            dem_data_dict[pair[1]] = data_new
        else:
            data_new = dem_data_dict[pair[1]]
    else:
        # if we don't have enough memory, don't store the all DEM data in memory, only read two needed.
        # wil increase reading operation from disk
        data_old, nodata_old = raster_io.read_raster_one_band_np(
            dem_groups_date[pair[0]][0], boundary=boundary)
        data_new, nodata_new = raster_io.read_raster_one_band_np(
            dem_groups_date[pair[1]][0], boundary=boundary)

        # replace nodata with nan
        data_old[data_old == nodata_old] = np.nan
        data_new[data_new == nodata_new] = np.nan

    # release some memory if we can (NO)

    return data_old, data_new

예제 #9

0

파일 보기

파일: dem_mosaic_crop.py 프로젝트: yghlc/rs_data_proc

def mask_dem_by_matchtag(input_dem, mask_tif, save_path):
    # check band, with, height
    height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(input_dem)
    height_mask, width_mask, count_mask, dtype_mask = raster_io.get_height_width_bandnum_dtype(mask_tif)

    if height_mask!=height or width_mask!=width or count_mask!=count:
        raise ValueError('size different between %s and %s'%(input_dem, mask_tif))

    if count != 1:
        raise ValueError('DEM and Matchtag should only have one band')

    dem_data, nodata = raster_io.read_raster_one_band_np(input_dem)
    matchdata, mask_nodata = raster_io.read_raster_one_band_np(mask_tif)

    # mask as nodata
    dem_data[ matchdata == 0 ] = nodata
    # save to file
    raster_io.save_numpy_array_to_rasterfile(dem_data,save_path,input_dem,compress='lzw',tiled='yes',bigtiff='if_safer')

    return save_path

예제 #10

0

파일 보기

파일: image_segment_test.py 프로젝트: yghlc/rs_data_proc

def test_to_unique_label_for_superpixels():
    label_img = os.path.expanduser(
        '~/Data/Arctic/canada_arctic/DEM/WR_dem_diff/segment_parallel_sub/WR_dem_diff_DEM_diff_prj_8bit_sub_label.tif'
    )
    out_labels, nodata = raster_io.read_raster_one_band_np(label_img)
    print('nodata', nodata)
    print('min and max labels of out_labels', np.min(out_labels),
          np.max(out_labels))
    new_labels = image_segment.to_unique_label_for_superpixels(out_labels)

    save_new_label = io_function.get_name_by_adding_tail(label_img, 'new')
    raster_io.save_numpy_array_to_rasterfile(new_labels, save_new_label,
                                             label_img)

예제 #11

0

파일 보기

def dem_to_relative_8bit_a_patch(idx, patch, patch_count, dem_path,
                                 dst_nodata):
    # print('tile: %d / %d' % (idx + 1, patch_count))
    # patch_w = patch[2]
    # patch_h = patch[3]

    dem, nodata = raster_io.read_raster_one_band_np(dem_path, boundary=patch)
    # print(dem.shape)
    # print(dem.ndim)
    # dem_re = np.expand_dims(dem,axis=0)
    # print(dem_re.shape)

    patch_relative_dem_8bit = raster_io.image_numpy_allBands_to_8bit_hist(
        dem,
        per_min=0.02,
        per_max=0.98,
        src_nodata=nodata,
        dst_nodata=dst_nodata)

    return patch, patch_relative_dem_8bit

예제 #12

0

파일 보기

파일: raster_statistic.py 프로젝트: gfrancis-ALElab/Landuse_DL

def zonal_stats_one_polygon(idx, polygon, image_tiles, img_tile_polygons, stats, nodata=None,range=None,
                            band = 1,all_touched=True):

    overlap_index = vector_gpd.get_poly_index_within_extent(img_tile_polygons, polygon)
    image_list = [image_tiles[item] for item in overlap_index]

    if len(image_list) == 1:
        out_image, out_tran,nodata = raster_io.read_raster_in_polygons_mask(image_list[0], polygon, nodata=nodata,
                                                                     all_touched=all_touched,bands=band)
    elif len(image_list) > 1:
        # for the case it overlap more than one raster, need to produce a mosaic
        tmp_saved_files = []
        for k_img, image_path in enumerate(image_list):

            # print(image_path)
            tmp_save_path = os.path.splitext(os.path.basename(image_path))[0] + '_subset_poly%d'%idx +'.tif'
            _, _,nodata = raster_io.read_raster_in_polygons_mask(image_path, polygon,all_touched=all_touched,nodata=nodata,
                                                          bands=band, save_path=tmp_save_path)
            tmp_saved_files.append(tmp_save_path)

        # mosaic files in tmp_saved_files
        save_path = 'raster_for_poly%d.tif'%idx
        mosaic_args_list = ['gdal_merge.py', '-o', save_path,'-n',str(nodata),'-a_nodata',str(nodata)]
        mosaic_args_list.extend(tmp_saved_files)
        if basic.exec_command_args_list_one_file(mosaic_args_list,save_path) is False:
            raise IOError('error, obtain a mosaic (%s) failed'%save_path)

        # read the raster
        out_image, out_nodata = raster_io.read_raster_one_band_np(save_path,band=band)
        # remove temporal raster
        tmp_saved_files.append(save_path)
        for item in tmp_saved_files:
            io_function.delete_file_or_dir(item)

    else:
        basic.outputlogMessage('warning, cannot find raster for %d (start=0) polygon'%idx)
        return None

    # do calculation
    return array_stats(out_image, stats, nodata,range=range)

예제 #13

0

파일 보기

def test_projection_epsg_2163():
    # read a patch from iamge with epsg_2163, then save, see what's the projection
    # path on my Mac
    # folder = os.path.expanduser('~/Data/flooding_area/Houston/Houston_SAR_GRD_FLOAT_gee/S1_Houston_prj_8bit')

    # path on tesia
    folder = os.path.expanduser(
        '~/Bhaltos2/lingcaoHuang/flooding_area/Houston/Houston_SAR_GRD_FLOAT_gee/S1_Houston_prj_8bit_select'
    )
    img_path = os.path.join(
        folder,
        'S1A_IW_GRDH_1SDV_20170829T002620_20170829T002645_018131_01E74D_D734_prj_8bit.tif'
    )

    xoff, yoff, xsize, ysize = 10000, 10000, 500, 500
    boundary = (xoff, yoff, xsize, ysize)
    img_data, nodata = raster_io.read_raster_one_band_np(img_path,
                                                         boundary=boundary)

    raster_io.save_numpy_array_to_rasterfile(img_data,
                                             'test_projection.tif',
                                             img_path,
                                             boundary=boundary)

예제 #14

0

파일 보기

def test_if_raseter_closed():

    # to test, if the raster is close if it's outside with open

    dir = os.path.expanduser('~/Data/Arctic/canada_arctic/DEM/WR_dem_diff')
    tifs = io_function.get_file_list_by_ext('.tif', dir, bsub_folder=False)
    print("%d tif in %s" % (len(tifs), dir))

    data_list = []
    for idx in range(10):  # each one open 10 times

        boundary = (0, 0, 100, 100)  # (xoff,yoff ,xsize, ysize)
        for tif in tifs:
            data = raster_io.read_raster_one_band_np(tif,
                                                     band=1,
                                                     boundary=boundary)
            data_list.append(data)

        # check current files
        # open_file_list = basic.get_curr_process_openfiles()
        open_file_list = basic.get_all_processes_openfiles('python')
        print(' open file count:', len(open_file_list))
        for o_file in open_file_list:
            print(o_file)

예제 #15

0

파일 보기

파일: dem_mosaic_crop.py 프로젝트: yghlc/rs_data_proc

def mask_strip_dem_outlier_by_ArcticDEM_mosaic(crop_strip_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num):

    # get list of the ArcticDEM mosaic
    arcticDEM_mosaic_reg_tifs = io_function.get_file_list_by_ext('.tif',arcticDEM_tile_reg_tif_dir,bsub_folder=False)
    mosaic_dem_ext_polys = get_dem_tif_ext_polygons(arcticDEM_mosaic_reg_tifs)

    overlap_index = vector_gpd.get_poly_index_within_extent(mosaic_dem_ext_polys,extent_poly)

    #### crop and mosaic mosaic_reg_tifs
    sub_mosaic_dem_tifs = [arcticDEM_mosaic_reg_tifs[item] for item in overlap_index]
    mosaic_crop_tif_list = []
    for tif in sub_mosaic_dem_tifs:
        save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) )
        if os.path.isfile(save_crop_path):
            basic.outputlogMessage('%s exists, skip cropping' % save_crop_path)
            mosaic_crop_tif_list.append(save_crop_path)
        else:
            crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near',
                            o_format='VRT', out_res=o_res,same_extent=True,thread_num=process_num)
            if crop_tif is False:
                raise ValueError('warning, crop %s failed' % tif)
            mosaic_crop_tif_list.append(crop_tif)
    if len(mosaic_crop_tif_list) < 1:
        basic.outputlogMessage('No mosaic version of ArcticDEM for %d grid, skip mask_strip_dem_outlier_by_ArcticDEM_mosaic'%extent_id)
        return False

    # create mosaic, can handle only input one file, but is slow
    save_dem_mosaic = os.path.join(crop_tif_dir, 'ArcticDEM_tiles_grid%d.tif'%extent_id)
    result = RSImageProcess.mosaic_crop_images_gdalwarp(mosaic_crop_tif_list, save_dem_mosaic, resampling_method='average',o_format='GTiff',
                                               compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num)
    if result is False:
        return False

    height_tileDEM, width_tileDEM, count_tileDEM, dtype_tileDEM = raster_io.get_height_width_bandnum_dtype(save_dem_mosaic)
    tileDEM_data, tileDEM_nodata = raster_io.read_raster_one_band_np(save_dem_mosaic)
    # masking the strip version of DEMs
    mask_strip_dem_list = []
    for idx, strip_dem in enumerate(crop_strip_dem_list):
        save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskOutlier')
        if os.path.isfile(save_path):
            basic.outputlogMessage('%s exist, skip'%save_path)
            mask_strip_dem_list.append(save_path)
            continue

        # check band, with, height
        height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(strip_dem)
        if height_tileDEM != height or width_tileDEM != width or count_tileDEM != count:
            raise ValueError('size different between %s and %s' % (strip_dem, save_dem_mosaic))
        if count != 1:
            raise ValueError('DEM and Matchtag should only have one band')

        try:
            dem_data, nodata = raster_io.read_raster_one_band_np(strip_dem)
        except:
            basic.outputlogMessage(' invalid tif file: %s'%strip_dem)
            continue

        nodata_loc = np.where(dem_data == nodata)

        diff = dem_data - tileDEM_data
        # mask as nodata
        dem_data[np.abs(diff) > 50 ] = nodata  # ignore greater than 50 m
        dem_data[ nodata_loc ] = nodata         # may change some nodata pixel, change them back
        # save to file
        raster_io.save_numpy_array_to_rasterfile(dem_data, save_path, strip_dem, compress='lzw', tiled='yes',
                                                 bigtiff='if_safer')
        mask_strip_dem_list.append(save_path)

    return mask_strip_dem_list

예제 #16

0

파일 보기

파일: grey_image_segment.py 프로젝트: yghlc/rs_data_proc

def segment_a_patch(idx, patch, patch_count, img_path, org_raster,
                    b_save_patch_label):

    print('tile: %d / %d' % (idx + 1, patch_count))
    image_name_no_ext = io_function.get_name_no_ext(img_path)
    patch_dir = image_name_no_ext + '_patch%d' % idx
    patch_label_path = os.path.join(
        patch_dir, image_name_no_ext + '_patch%d_label.tif' % idx)
    if b_save_patch_label is True:
        if os.path.isdir(patch_dir) is False:
            io_function.mkdir(patch_dir)
        if os.path.isfile(patch_label_path):
            print('%s exists, skip' % patch_label_path)
            return patch, patch_label_path, None, None

    # read imag
    one_band_img, nodata = raster_io.read_raster_one_band_np(img_path,
                                                             boundary=patch)

    # # apply median filter to image (remove some noise)
    one_band_img = cv2.medianBlur(one_band_img,
                                  3)  # with kernal=3, cannot accept int32

    # segmentation algorithm (the output of these algorithms is not alway good, need to chose the parameters carafully)
    # out_labels = watershed_segmentation(one_band_img)
    # out_labels = k_mean_cluster_segmentation(one_band_img)
    out_labels = quickshift_segmentaion(one_band_img,
                                        ratio=0.3,
                                        kernel_size=5,
                                        max_dist=10,
                                        sigma=1,
                                        convert2lab=False)
    #
    #
    # out_labels = mean_shift_segmentation(one_band_img)

    # print('min and max labels of out_labels', np.min(out_labels), np.max(out_labels))

    if b_save_patch_label is True:
        # save the label
        raster_io.save_numpy_array_to_rasterfile(
            out_labels, patch_label_path, img_path,
            boundary=patch)  # it copy nodata, need to unset it later
        return patch, patch_label_path, None, None

    # calculate the attributes based on orginal data for original data
    object_attributes = {}  # object id (label) and attributes (list)
    if org_raster is not None:
        org_img_b1, org_nodata = raster_io.read_raster_one_band_np(
            org_raster, boundary=patch)

        # get regions (the labels output by segmentation is not unique for superpixels)
        # regions = measure.regionprops(out_labels, intensity_image=org_img_b1)     # regions is based on out_labels, so it has the same issue.
        # print('region count from sk-image measure:',len(regions))

        label_list = np.unique(out_labels)
        # get statistics for each segmented object (label)
        for label in label_list:
            in_array = org_img_b1[out_labels == label]
            object_attributes[label] = get_stastics_from_array(
                in_array, org_nodata)

        return patch, out_labels, nodata, object_attributes

    return patch, out_labels, nodata, None

예제 #17

0

파일 보기

파일: process_largeRegion_butLimited_storage.py 프로젝트: yghlc/rs_data_proc

def main(options, args):
    extent_shp = args[0]
    task_list = [args[item] for item in range(1, len(args))]
    # task_name = args[1]
    if len(task_list) < 1:
        raise ValueError('There is no task: %s' % str(task_list))

    # local_grid_id_txt is in the current dir
    # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir
    local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)
    # check if it has been complete
    if os.path.isfile(log_grid_ids_txt_done):
        basic.outputlogMessage('Tasks for extent %s have been completed' %
                               extent_shp)
        return True

    r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir
    r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir
    process_node = '$curc_host' if options.process_node is None else options.process_node
    download_node = '$curc_host' if options.download_node is None else options.download_node

    max_grid_count = options.max_grids
    b_remove_tmp_folders = options.b_remove_tmp_folders
    b_dont_remove_DEM_files = options.b_dont_remove_DEM_files
    b_no_slurm = options.b_no_slurm
    b_divide_to_subsets = True

    # modify the folder name of subsets
    global subset_shp_dir
    subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext(
        extent_shp)
    global msg_file_pre
    msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre

    grid_ids_to_process_txt = io_function.get_name_no_ext(
        extent_shp) + '_' + 'grid_ids_to_process.txt'

    # build map dem cover grid (take time, but only need to run once at the beginning)
    build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp,
                                     strip_dem_cover_grids_txt)
    build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp,
                                     tile_dem_cover_grids_txt)

    # get grids for processing
    # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp)

    # get grid ids based on input extent
    grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    # based on extent shape, subset grid_20_id_raster
    # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it
    # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub')
    # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False:
    #     return False

    # read grid_ids_2d, then mask it
    grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np(
        grid_20_id_raster)  # 2d array of gird ids
    # rasterize grid_polys, will served as mask.
    grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster(
        grid_20_id_raster, grid_polys, 1, None)
    # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255)  # save to disk for checking
    loc_masked_out = np.where(grid_ids_2d_mask != 1)
    # grid_ids_2d[ loc_masked_out ] = grid_nodata
    visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8)
    visit_np[loc_masked_out] = 1  # 1 indicate already visited
    visit_np[np.where(
        grid_ids_2d == grid_nodata)] = 1  # 1 indicate already visited

    subset_id = -1
    # on tesia, uist, vpn-connected laptop
    if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
        io_function.mkdir(subset_shp_dir)
        sync_log_files(process_node, r_log_dir, process_log_dir)
        update_complete_grid_list(grid_ids, task_list)

    while True:
        subset_id += 1
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:

            # remove grids that has been complete or ignored
            ignore_ids = get_complete_ignore_grid_ids()
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids,
                ignore_ids=ignore_ids,
                save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            # if the input is not a shapefile, then don't divide it to many subsets
            if extent_shp.endswith('.txt'):
                select_grid_polys, selected_gird_ids = grid_polys, grid_ids
                if len(selected_gird_ids) > 2000:
                    raise ValueError('There are too many grid to process once')
                b_divide_to_subsets = False
                subset_id = 999999
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                save_selected_girds_and_ids(selected_gird_ids,
                                            select_grid_polys, gird_prj,
                                            select_grids_shp)

            else:
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                select_grid_polys, selected_gird_ids = get_grids_for_download_process(
                    grid_polys,
                    grid_ids,
                    ignore_ids,
                    max_grid_count,
                    grid_ids_2d,
                    visit_np,
                    select_grids_shp,
                    proj=gird_prj)
            if selected_gird_ids is None:
                break  # no more grids
            if len(selected_gird_ids) < 1:
                continue

            subset_info_txt = msg_file_pre + '%d.txt' % subset_id
            if os.path.isfile(subset_info_txt) is False:
                # init the file
                update_subset_info(subset_info_txt,
                                   key_list=[
                                       'id', 'createTime', 'shp', 'pre_status',
                                       'proc_status'
                                   ],
                                   info_list=[
                                       subset_id,
                                       str(datetime.now()), select_grids_shp,
                                       'notYet', 'notYet'
                                   ])

            # download and unpack ArcticDEM, do registration, send to curc
            if download_process_send_arctic_dem(subset_info_txt,
                                                r_working_dir,
                                                process_node,
                                                task_list,
                                                b_send_data=b_no_slurm
                                                == False) is True:
                continue

            # copy file from remote machine
            if b_no_slurm is False:
                copy_results_from_remote_node()

                sync_log_files(process_node, r_log_dir, process_log_dir)

                # update complete id list
                update_complete_grid_list(grid_ids, task_list)

            # save this to disk, to check progress, if there are not too many grids (<100),
            # we can use this one to process withtou divide grids to many subsets
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids, save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            if b_no_slurm:
                # process ArcticDEM using local computing resource
                if produce_dem_products(
                        task_list,
                        b_remove_job_folder=b_remove_tmp_folders,
                        no_slurm=b_no_slurm) is False:
                    break

            if b_divide_to_subsets is False:
                break

        elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:  # curc
            # process ArcticDEM using the computing resource on CURC
            if produce_dem_products(
                    task_list,
                    b_remove_job_folder=b_remove_tmp_folders) is False:
                break
        else:
            print('unknown machine : %s ' % machine_name)
            break

        # remove no need dem files
        remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)

    # monitor results in remote computer
    check_time = 200
    while check_time > 0 and b_no_slurm == False:
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
            print(datetime.now(), 'wait 10 min for results in computing nodes')
            time.sleep(600)
            # copy file from remote machine
            copy_results_from_remote_node()
            # sync complete id list, dem info, no dem grids etcs.
            sync_log_files(process_node, r_log_dir, process_log_dir)
            # update complete id list
            update_complete_grid_list(grid_ids, task_list)
            # remove no need dem files
            remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)
            remote_sub_txt = get_subset_info_txt_list(
                'proc_status', ['notYet', 'working'],
                remote_node=process_node,
                remote_folder=r_working_dir)
            if len(remote_sub_txt) < 1 and check_time != 1:
                check_time = 1  # set to 1, then will only check one more time
            else:
                check_time -= 1
        else:
            break