Exemple #1
0
def main(options, args):
    extent_shp_or_ids_txt = args[0]
    process_num = options.process_num
    o_res = options.out_res

    if os.path.isdir(grid_matchtag_sum_dir) is False:
        io_function.mkdir(grid_matchtag_sum_dir)

    basic.setlogfile('produce_matchtag_sum_ArcticDEM_log_%s.txt' %
                     timeTools.get_now_time_str())

    # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    # get grid ids based on input extent
    grid_base_name = os.path.splitext(
        os.path.basename(extent_shp_or_ids_txt))[0]
    grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys,
                                       all_ids)

    # check dem difference existence
    grid_dem_tifs, grid_ids_no_sum = get_existing_matchtag_sum(
        grid_matchtag_sum_dir, grid_base_name, grid_ids)
    if len(grid_ids_no_sum) > 0:
        # refine grid_polys
        if len(grid_ids) > len(grid_ids_no_sum):
            id_index = [grid_ids.index(id) for id in grid_ids_no_sum]
            grid_polys = [grid_polys[idx] for idx in id_index]

        # # download ArcticDEM and applying registration
        # tarballs, reg_tifs = download_dem_tarball(dem_strip_shp, grid_polys, arcticDEM_tarball_dir, grid_base_name,
        #                                         reg_tif_dir=arcticDEM_reg_tif_dir, poly_ids=grid_ids_no_demDiff)
        #
        # # unpack and applying registration
        # if len(tarballs) > 0:
        #     basic.outputlogMessage('Processs %d dem tarballs'%len(tarballs))
        #     out_reg_tifs = process_dem_tarball(tarballs,'./',arcticDEM_reg_tif_dir,remove_inter_data=True, apply_registration=True)
        #     basic.outputlogMessage('Get %d new registration dem tifs' % len(out_reg_tifs))
        #     reg_tifs.extend(out_reg_tifs)

        reg_tifs = io_function.get_file_list_by_ext('.tif',
                                                    arcticDEM_reg_tif_dir,
                                                    bsub_folder=False)
        matchtag_tifs = [tif for tif in reg_tifs
                         if 'matchtag' in tif]  # only keep matchtag
        # crop, sum
        out_dem_diffs = produce_matchtag_sum_grids(grid_polys,
                                                   grid_ids_no_sum,
                                                   grid_base_name,
                                                   matchtag_tifs,
                                                   o_res,
                                                   process_num=process_num)
def main(options, args):
    extent_shp_or_ids_txt = args[0]
    process_num = options.process_num
    keep_dem_percent = options.keep_dem_percent
    o_res = options.out_res

    basic.setlogfile('produce_headwall_shp_ArcticDEM_log_%s.txt' %
                     timeTools.get_now_time_str())

    if os.path.isdir(grid_dem_headwall_shp_dir) is False:
        io_function.mkdir(grid_dem_headwall_shp_dir)

        # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    # get grid ids based on input extent
    grid_base_name = os.path.splitext(
        os.path.basename(extent_shp_or_ids_txt))[0]
    grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys,
                                       all_ids)

    # check dem difference existence
    grid_headwall_shps, grid_id_no_headwall_shp = get_existing_grid_headwall_shp(
        grid_dem_headwall_shp_dir, grid_base_name, grid_ids)
    if len(grid_id_no_headwall_shp) > 0:
        # refine grid_polys
        if len(grid_ids) > len(grid_id_no_headwall_shp):
            id_index = [grid_ids.index(id) for id in grid_id_no_headwall_shp]
            grid_polys = [grid_polys[idx] for idx in id_index]

        reg_tifs = io_function.get_file_list_by_ext('.tif',
                                                    arcticDEM_reg_tif_dir,
                                                    bsub_folder=False)
        reg_tifs = [tif for tif in reg_tifs
                    if 'matchtag' not in tif]  # remove matchtag
        #
        headwall_shp_folders = extract_headwall_grids(grid_polys,
                                                      grid_id_no_headwall_shp,
                                                      grid_base_name,
                                                      reg_tifs,
                                                      b_mosaic_id,
                                                      b_mosaic_date,
                                                      keep_dem_percent,
                                                      o_res,
                                                      process_num=process_num)
def get_grid_ids_extent(extent_shp):
    if 'ArcticDEM_grid_20km' in os.path.basename(extent_shp):
        print(
            'input %s like a grid files, read grid polygons and ids from it directly'
            % extent_shp)
        grid_polys, grid_ids = vector_gpd.read_polygons_attributes_list(
            extent_shp, 'grid_id')
        file_name_base = os.path.splitext(os.path.basename(extent_shp))[0]
        shp_corresponding_grid_ids_txt = file_name_base + '_grid_ids.txt'
        io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,
                                     [str(item) for item in grid_ids])
    else:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')  # in this file, it's "id", not "grid_id"
        print('time cost of read polygons and attributes', time.time() - time0)
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    return grid_ids
Exemple #4
0
def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt,
                                       log_grid_ids_txt):

    # if it in the logdir, not the current dir, then copy it
    if os.path.isfile(
            log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False:
        io_function.copy_file_to_dst(log_grid_ids_txt,
                                     local_grid_id_txt,
                                     overwrite=False)
        return True

    # if not in the local dir, then generate it
    if os.path.isfile(local_grid_id_txt) is False:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')
        print('time cost of read polygons and attributes', time.time() - time0)

        # this will create local_grid_id_txt
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

        # modify local_grid_id_txt by excluding grid_id already in adjacent extent
        other_grid_ids = read_grid_ids_from_other_extent()
        grid_ids = [id for id in grid_ids if id not in other_grid_ids]

        # over write local_grid_id_txt file
        grid_ids_str = [str(item) for item in grid_ids]
        io_function.copy_file_to_dst(local_grid_id_txt,
                                     io_function.get_name_by_adding_tail(
                                         local_grid_id_txt,
                                         'noRMadj'))  # save a copy
        io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str)

        # copy to log dir
        io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt)

    return True
def main(options, args):
    extent_shp = args[0]
    task_list = [args[item] for item in range(1, len(args))]
    # task_name = args[1]
    if len(task_list) < 1:
        raise ValueError('There is no task: %s' % str(task_list))

    # local_grid_id_txt is in the current dir
    # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir
    local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)
    # check if it has been complete
    if os.path.isfile(log_grid_ids_txt_done):
        basic.outputlogMessage('Tasks for extent %s have been completed' %
                               extent_shp)
        return True

    r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir
    r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir
    process_node = '$curc_host' if options.process_node is None else options.process_node
    download_node = '$curc_host' if options.download_node is None else options.download_node

    max_grid_count = options.max_grids
    b_remove_tmp_folders = options.b_remove_tmp_folders
    b_dont_remove_DEM_files = options.b_dont_remove_DEM_files
    b_no_slurm = options.b_no_slurm
    b_divide_to_subsets = True

    # modify the folder name of subsets
    global subset_shp_dir
    subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext(
        extent_shp)
    global msg_file_pre
    msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre

    grid_ids_to_process_txt = io_function.get_name_no_ext(
        extent_shp) + '_' + 'grid_ids_to_process.txt'

    # build map dem cover grid (take time, but only need to run once at the beginning)
    build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp,
                                     strip_dem_cover_grids_txt)
    build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp,
                                     tile_dem_cover_grids_txt)

    # get grids for processing
    # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp)

    # get grid ids based on input extent
    grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    # based on extent shape, subset grid_20_id_raster
    # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it
    # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub')
    # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False:
    #     return False

    # read grid_ids_2d, then mask it
    grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np(
        grid_20_id_raster)  # 2d array of gird ids
    # rasterize grid_polys, will served as mask.
    grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster(
        grid_20_id_raster, grid_polys, 1, None)
    # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255)  # save to disk for checking
    loc_masked_out = np.where(grid_ids_2d_mask != 1)
    # grid_ids_2d[ loc_masked_out ] = grid_nodata
    visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8)
    visit_np[loc_masked_out] = 1  # 1 indicate already visited
    visit_np[np.where(
        grid_ids_2d == grid_nodata)] = 1  # 1 indicate already visited

    subset_id = -1
    # on tesia, uist, vpn-connected laptop
    if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
        io_function.mkdir(subset_shp_dir)
        sync_log_files(process_node, r_log_dir, process_log_dir)
        update_complete_grid_list(grid_ids, task_list)

    while True:
        subset_id += 1
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:

            # remove grids that has been complete or ignored
            ignore_ids = get_complete_ignore_grid_ids()
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids,
                ignore_ids=ignore_ids,
                save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            # if the input is not a shapefile, then don't divide it to many subsets
            if extent_shp.endswith('.txt'):
                select_grid_polys, selected_gird_ids = grid_polys, grid_ids
                if len(selected_gird_ids) > 2000:
                    raise ValueError('There are too many grid to process once')
                b_divide_to_subsets = False
                subset_id = 999999
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                save_selected_girds_and_ids(selected_gird_ids,
                                            select_grid_polys, gird_prj,
                                            select_grids_shp)

            else:
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                select_grid_polys, selected_gird_ids = get_grids_for_download_process(
                    grid_polys,
                    grid_ids,
                    ignore_ids,
                    max_grid_count,
                    grid_ids_2d,
                    visit_np,
                    select_grids_shp,
                    proj=gird_prj)
            if selected_gird_ids is None:
                break  # no more grids
            if len(selected_gird_ids) < 1:
                continue

            subset_info_txt = msg_file_pre + '%d.txt' % subset_id
            if os.path.isfile(subset_info_txt) is False:
                # init the file
                update_subset_info(subset_info_txt,
                                   key_list=[
                                       'id', 'createTime', 'shp', 'pre_status',
                                       'proc_status'
                                   ],
                                   info_list=[
                                       subset_id,
                                       str(datetime.now()), select_grids_shp,
                                       'notYet', 'notYet'
                                   ])

            # download and unpack ArcticDEM, do registration, send to curc
            if download_process_send_arctic_dem(subset_info_txt,
                                                r_working_dir,
                                                process_node,
                                                task_list,
                                                b_send_data=b_no_slurm
                                                == False) is True:
                continue

            # copy file from remote machine
            if b_no_slurm is False:
                copy_results_from_remote_node()

                sync_log_files(process_node, r_log_dir, process_log_dir)

                # update complete id list
                update_complete_grid_list(grid_ids, task_list)

            # save this to disk, to check progress, if there are not too many grids (<100),
            # we can use this one to process withtou divide grids to many subsets
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids, save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            if b_no_slurm:
                # process ArcticDEM using local computing resource
                if produce_dem_products(
                        task_list,
                        b_remove_job_folder=b_remove_tmp_folders,
                        no_slurm=b_no_slurm) is False:
                    break

            if b_divide_to_subsets is False:
                break

        elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:  # curc
            # process ArcticDEM using the computing resource on CURC
            if produce_dem_products(
                    task_list,
                    b_remove_job_folder=b_remove_tmp_folders) is False:
                break
        else:
            print('unknown machine : %s ' % machine_name)
            break

        # remove no need dem files
        remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)

    # monitor results in remote computer
    check_time = 200
    while check_time > 0 and b_no_slurm == False:
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
            print(datetime.now(), 'wait 10 min for results in computing nodes')
            time.sleep(600)
            # copy file from remote machine
            copy_results_from_remote_node()
            # sync complete id list, dem info, no dem grids etcs.
            sync_log_files(process_node, r_log_dir, process_log_dir)
            # update complete id list
            update_complete_grid_list(grid_ids, task_list)
            # remove no need dem files
            remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)
            remote_sub_txt = get_subset_info_txt_list(
                'proc_status', ['notYet', 'working'],
                remote_node=process_node,
                remote_folder=r_working_dir)
            if len(remote_sub_txt) < 1 and check_time != 1:
                check_time = 1  # set to 1, then will only check one more time
            else:
                check_time -= 1
        else:
            break
Exemple #6
0
def main(options, args):

    process_num = options.process_num
    buffer_size = options.buffer_size
    # perform the selection grid by grid
    basic.setlogfile('select_RTS_YOLO_demDiff_headwall_%s.txt' %
                     timeTools.get_now_time_str())

    b_grid = options.b_grid
    if b_grid:
        # process the selection grid by grid
        extent_shp_or_ids_txt = args[0]
        yolo_result_dir = os.path.expanduser(
            '~/Data/Arctic/alaska/autoMapping/alaskaNS_yolov4_1')
        dem_subsidence_dir = grid_dem_diffs_segment_dir
        grid_headwall_dir = grid_dem_headwall_shp_dir

        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')
        print('time cost of read polygons and attributes', time.time() - time0)

        # get grid ids based on input extent
        grid_base_name = os.path.splitext(
            os.path.basename(extent_shp_or_ids_txt))[0]
        grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt,
                                           all_grid_polys, all_ids)

        # check dem difference existence
        grid_rts_shps, grid_id_no_rts_shp = get_existing_select_grid_rts(
            grid_rts_shp_dir, grid_base_name, grid_ids)

        if len(grid_id_no_rts_shp) > 0:
            # refine grid_polys
            if len(grid_ids) > len(grid_id_no_rts_shp):
                id_index = [grid_ids.index(id) for id in grid_id_no_rts_shp]
                grid_polys = [grid_polys[idx] for idx in id_index]
            #
            rts_shp_folders = select_rts_map_demDiff_headwall_grids(
                yolo_result_dir,
                dem_subsidence_dir,
                grid_headwall_dir,
                grid_polys,
                grid_id_no_rts_shp,
                grid_base_name,
                process_num=process_num)
    else:
        # processing the selection for two input shapefile
        yolo_box_shp = args[0]
        dem_subsidence_shp = args[1]
        print('polygon group 1:', yolo_box_shp)
        print('polygon group 2:', dem_subsidence_shp)

        if options.save_path is not None:
            save_path = options.save_path
        else:
            save_path = io_function.get_name_by_adding_tail(
                yolo_box_shp, 'select')

        select_polygons_overlap_others_in_group2(yolo_box_shp,
                                                 dem_subsidence_shp,
                                                 save_path,
                                                 buffer_size=buffer_size,
                                                 process_num=process_num)

    pass