Python save_list_to_txt Examples, basic_src.io_function.save_list_to_txt Python Examples

Example #1

0

Show file

File: process_largeRegion_butLimited_storage.py Project: yghlc/rs_data_proc

def update_complete_grid_list(grid_ids, task_list):
    # based on some criteria, to check if results exist, then update grid_complete_list_txt
    completed_id_list = []
    if os.path.isfile(grid_complete_list_txt):
        completed_id_list = [
            int(item)
            for item in io_function.read_list_from_txt(grid_complete_list_txt)
        ]
    n_task = len(task_list)
    if n_task < 1:
        raise ValueError('No task in %s' % str(task_list))

    for g_id in grid_ids:
        if g_id in completed_id_list:
            continue
        # check if it has been completed based on multiple criteria
        complete_count = 0
        if 'dem_diff' in task_list and b_exist_gid_dem_diff(g_id):
            complete_count += 1
        if 'hillshade_headwall_line' in task_list and b_exist_dem_hillshade_newest_HWLine_grid(
                g_id):
            complete_count += 1
        if 'dem_headwall_grid' in task_list and b_exist_grid_headwall_shp(
                g_id):
            complete_count += 1
        if 'segment' in task_list and b_exist_grid_dem_subsidence(g_id):
            complete_count += 1
        # we may check more task results: segment, dem_headwall

        if complete_count == n_task:
            completed_id_list.append(g_id)

    # save the txt
    completed_id_list = [str(item) for item in completed_id_list]
    io_function.save_list_to_txt(grid_complete_list_txt, completed_id_list)

Example #2

0

Show file

File: image_translation_gan.py Project: yghlc/Landuse_DL

def generate_image_CUT(python_path, generate_script, gan_para_file, gpu_ids,
                       image_list, save_folder):

    if os.path.isfile('generate.txt_done'):
        basic.outputlogMessage(
            'generate of new images using GAN in %s has completed previoulsy, please remove them if necessary'
            % os.getcwd())
        return True

    time0 = time.time()
    generate_tile_width = parameters.get_digit_parameters(
        gan_para_file, 'generate_tile_width', 'int')
    generate_tile_height = parameters.get_digit_parameters(
        gan_para_file, 'generate_tile_height', 'int')
    generate_overlay_x = parameters.get_digit_parameters(
        gan_para_file, 'generate_overlay_x', 'int')
    generate_overlay_y = parameters.get_digit_parameters(
        gan_para_file, 'generate_overlay_y', 'int')

    folder = os.path.basename(os.getcwd())
    img_list_txt = 'image_to_generate_list.txt'
    io_function.save_list_to_txt(img_list_txt, image_list)

    command_string = python_path + ' '  +  generate_script \
                + ' --dataset_mode '+'satelliteimage' \
                + ' --model '+ 'generate' \
                + ' --image_A_dir_txt ' + img_list_txt \
                + ' --tile_width ' + str(generate_tile_width) \
                + ' --tile_height ' + str(generate_tile_height) \
                + ' --overlay_x ' + str(generate_overlay_x) \
                + ' --overlay_y ' + str(generate_overlay_y)  \
                + ' --name ' + folder  \
                + ' --results_dir ' + save_folder  \
                + ' --gpu_ids ' + ','.join([str(item) for item in gpu_ids])

    train_max_dataset_size = parameters.get_digit_parameters_None_if_absence(
        gan_para_file, 'gen_max_dataset_size', 'int')
    if train_max_dataset_size is not None:
        command_string += ' --max_dataset_size ' + str(train_max_dataset_size)

    # if it's cycleGAN, need to assign A generator
    gan_model = parameters.get_string_parameters(gan_para_file, 'gan_model')
    if gan_model == 'cycle_gan':
        command_string += ' --model_suffix _A '  # from A to B

    # status, result = basic.exec_command_string(command_string)  # this will wait command finished
    # os.system(command_string + "&")  # don't know when it finished
    res = os.system(command_string)  # this work
    # print('command_string deeplab_inf_script: res',res)
    if res != 0:
        sys.exit(1)

    duration = time.time() - time0
    os.system(
        'echo "$(date): time cost of generate images using a GAN : %.2f seconds">>"time_cost.txt"'
        % (duration))
    # write a file to indicate that the process has completed.
    os.system('echo done > generate.txt_done')

    return True

Example #3

0

Show file

def split_an_image(para_file, image_path, save_dir, patch_w, patch_h,
                   overlay_x, overlay_y):

    split_format = parameters.get_string_parameters(para_file,
                                                    'split_image_format')
    out_format = 'PNG'  # default is PNG
    if split_format == '.tif': out_format = 'GTIFF'
    if split_format == '.jpg': out_format = 'JPEG'
    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)

    split_image.split_image(image_path,
                            save_dir,
                            patch_w,
                            patch_h,
                            overlay_x,
                            overlay_y,
                            out_format,
                            pre_name=None,
                            process_num=8)
    # get list
    patch_list = io_function.get_file_list_by_ext(split_format,
                                                  save_dir,
                                                  bsub_folder=False)
    if len(patch_list) < 1:
        print('Wanring, no images in %s' % save_dir)
        return None
    list_txt_path = save_dir + '_list.txt'
    io_function.save_list_to_txt(list_txt_path, patch_list)
    return list_txt_path

Example #4

0

Show file

def main():
    ntf_list = io_function.get_file_list_by_ext('.ntf',
                                                os.path.join(dir, 'DATA'),
                                                bsub_folder=True)
    io_function.save_list_to_txt('ntf_list.txt', ntf_list)
    dem_list = io_function.get_file_list_by_ext('.tif',
                                                os.path.join(dir, 'PRODUCTS'),
                                                bsub_folder=True)
    dem_list = [
        item for item in dem_list
        if item.endswith('_dem.tif') and 'strips' in item
    ]
    io_function.save_list_to_txt('dem_list.txt', dem_list)

    for idx, ntf in enumerate(ntf_list):
        print(' (%d/%d) working on ' % (idx + 1, len(ntf_list)), ntf)
        name = os.path.basename(ntf)
        scene_id = name.split('_')[2]
        print('scene_id:', scene_id)

        dem_path = None
        for dem_tif in dem_list:
            if scene_id in os.path.basename(dem_tif):
                dem_path = dem_tif
                break
        if dem_path is None:
            raise ValueError('Cannot find the corresponding DEM')

        output = os.path.splitext(name)[0] + '_ortho_sub.tif'
        ortho_rectified_gdalwarp(ntf, output, dem_path)
        # break

    pass

Example #5

0

Show file

def main(options, args):
    input = args[0]

    if input.endswith('.txt'):
        slope_tifs = io_function.read_list_from_txt(input)
    elif os.path.isdir(input):
        slope_tifs = io_function.get_file_list_by_ext('.tif',input, bsub_folder=True)
    else:
        slope_tifs = [ input]
    process_num = options.process_num

    working_dir = './'
    save_dir = dem_headwall_shp_dir
    if os.path.isdir(working_dir) is False:
        io_function.mkdir(working_dir)
    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)

    failed_tifs = []

    min_slope = options.min_slope
    min_size = options.min_area
    max_size = options.max_area
    max_axis_width = options.max_axis_width
    max_box_WH = options.max_box_WH
    for idx, slope in enumerate(slope_tifs):
        if extract_headwall_from_slope(idx, len(slope_tifs), slope,working_dir,save_dir, min_slope,min_size,max_size,max_axis_width,max_box_WH,process_num) is False:
            failed_tifs.append(slope)

    io_function.save_list_to_txt('extract_headwall_failed_tifs.txt',failed_tifs)

Example #6

0

Show file

File: dem_mosaic_crop.py Project: yghlc/rs_data_proc

def mosaic_dem_list_gdal_merge(key, dem_list, save_tif_dir,save_source):
    # Use gdal_merge.py to create a mosaic, In areas of overlap, the last image will be copied over earlier ones.

    save_mosaic = os.path.join(save_tif_dir, key + '.tif')
    b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif')
    if b_save_mosaic is not False:
        basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic)
        return save_mosaic

    # save the source file for producing the mosaic
    if save_source:
        save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt')
        io_function.save_list_to_txt(save_mosaic_source_txt, dem_list)

    # if only one dem, then copy it if it's not VRT format
    if len(dem_list) == 1:
        if raster_io.get_driver_format(dem_list[0]) != 'VRT':
            io_function.copy_file_to_dst(dem_list[0], save_mosaic)
            return save_mosaic

    nodata = raster_io.get_nodata(dem_list[0])

    # create mosaic, can handle only input one file, but is slow
    result = RSImageProcess.mosaics_images(dem_list,save_mosaic,nodata=nodata,
                                           compress='lzw', tiled='yes', bigtiff='if_safer')

    if result is False:
        sys.exit(1)
        # return False
    return save_mosaic

Example #7

0

Show file

File: monitor_fail_jobs.py Project: yghlc/rs_data_proc

def merge_grid_ids_txt(task, fail_id_txt_list):
    id_list = []
    for txt in fail_id_txt_list:
        id_list.extend(io_function.read_list_from_txt(txt))
    id_list = list(set(id_list))  # remove redudant ones
    save_path = '%s_fail_grid_ids.txt' % task
    io_function.save_list_to_txt(save_path, id_list)
    return save_path

Example #8

0

Show file

File: process_largeRegion_butLimited_storage.py Project: yghlc/rs_data_proc

def save_grid_ids_need_to_process(grid_ids,
                                  ignore_ids=None,
                                  save_path='grid_ids_to_process.txt'):
    ''' save a list to txt, contain grid ids need to process, return the number of grids to process'''
    if ignore_ids is None:
        id_list = get_complete_ignore_grid_ids()
    else:
        id_list = ignore_ids
    ids_need_to_proc = [str(id) for id in grid_ids if id not in id_list]
    io_function.save_list_to_txt(save_path, ids_need_to_proc)
    return len(ids_need_to_proc)

Example #9

0

Show file

def main():

    file_list = io_function.get_file_list_by_pattern(arcticDEM_reg_tif_dir,
                                                     '*_dem_reg.tif')
    print('Get %d dem_reg.tif from %s' %
          (len(file_list), arcticDEM_reg_tif_dir))

    year_dates = [
        timeTools.get_yeardate_yyyymmdd(os.path.basename(item),
                                        pattern='[0-9]{8}_')
        for item in file_list
    ]
    month_list = [item.month for item in year_dates]
    value_list = month_list

    # save unique date to txt file
    dates_unique = set(year_dates)
    dates_unique = sorted(dates_unique)
    dates_unique_str = [
        timeTools.date2str(item, '%Y-%m-%d') for item in dates_unique
    ]
    io_function.save_list_to_txt('dates_unique.txt', dates_unique_str)

    # plot a histogram
    # bin_count = 12
    bins = np.arange(0, 12, 1)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8))
    n, bins, patches = ax.hist(value_list,
                               bins=bins,
                               alpha=0.75,
                               ec="black",
                               linewidth='1.5',
                               color='grey',
                               hatch='',
                               rwidth=1)  # density = True, # label=labels,

    # ax.legend(prop={'size': 12})
    plt.xticks(bins)
    ax.tick_params(axis='both',
                   which='both',
                   direction='out',
                   length=7,
                   labelsize=20)  # ,width=50 #,
    # if xlabelrotation is not None:
    #     ax.tick_params(axis='x', labelrotation=90)

    # if ylim is not None:
    #     ax.set_ylim(ylim)

    plt.gcf().subplots_adjust(bottom=0.15)
    # plt.grid(True)
    plt.savefig('ArcticDEM_strip_date_hist.jpg')  #

Example #10

0

Show file

File: check_valid_tif.py Project: yghlc/rs_data_proc

def main(options, args):

    # process_num = multiprocessing.cpu_count()
    process_num = options.process_num
    data_folder = arcticDEM_reg_tif_dir
    if len(args) > 0:
        data_folder = args[0]

    tifs = io_function.get_file_list_by_pattern(
        data_folder, '*.tif')  # _dem_reg check all tifs
    save_invalid_txt_path = os.path.basename(data_folder) + '_invalid_list.txt'
    save_good_txt_path = os.path.basename(data_folder) + '_good_list.txt'
    tif_count = len(tifs)

    basic.outputlogMessage('get %d tif files in %s' % (tif_count, data_folder))

    good_tifs = []
    if os.path.isfile(save_good_txt_path):
        good_tifs.extend(io_function.read_list_from_txt(save_good_txt_path))
    invalid_tif = []

    # remove good one for the list
    if len(good_tifs) > 0:
        tifs = [
            item for item in tifs if os.path.basename(item) not in good_tifs
        ]

    if process_num == 1:
        for idx, tif in enumerate(tifs):
            if check_one_tif(idx, tif_count, tif, good_tifs):
                good_tifs.append(os.path.basename(tif))
            else:
                invalid_tif.append(os.path.basename(tif))
    else:
        theadPool = Pool(process_num)  # multi processes
        parameters_list = [(idx, tif_count, tif, good_tifs)
                           for idx, tif in enumerate(tifs)]
        results = theadPool.starmap(check_one_tif,
                                    parameters_list)  # need python3
        for tif, res in zip(tifs, results):
            if res:
                good_tifs.append(os.path.basename(tif))
            else:
                invalid_tif.append(os.path.basename(tif))
        theadPool.close()

    io_function.save_list_to_txt(save_invalid_txt_path, invalid_tif)
    io_function.save_list_to_txt(save_good_txt_path, good_tifs)

Example #11

0

Show file

File: process_largeRegion_butLimited_storage.py Project: yghlc/rs_data_proc

def save_selected_girds_and_ids(selected_gird_id_list, select_grid_polys, proj,
                                save_path):
    # save to shapefile to download and processing
    # change numpy.uint16 to int, avoid become negative when saving to shapefile
    selected_gird_id_list = [int(item) for item in selected_gird_id_list]
    save_pd = pd.DataFrame({
        'grid_id': selected_gird_id_list,
        'Polygon': select_grid_polys
    })
    vector_gpd.save_polygons_to_files(save_pd, 'Polygon', proj, save_path)
    basic.outputlogMessage('saved %d grids to %s' %
                           (len(select_grid_polys), save_path))
    # save the ids to txt
    save_id_txt = os.path.splitext(save_path)[0] + '_grid_ids.txt'
    selected_grid_ids_str = [str(item) for item in selected_gird_id_list]
    io_function.save_list_to_txt(save_id_txt, selected_grid_ids_str)

Example #12

0

Show file

File: check_vector_file.py Project: yghlc/rs_data_proc

def main(options, args):
    # process_num = multiprocessing.cpu_count()
    process_num = options.process_num
    if len(args) > 0:
        data_folder = args[0]
    else:
        data_folder = grid_dem_headwall_shp_dir

    basic.outputlogMessage('check shapefiles in %s, with %d processes'%(data_folder,process_num))


    vector_files = io_function.get_file_list_by_ext('.shp',data_folder ,bsub_folder=True)
    save_invalid_txt_path = os.path.basename(data_folder) + '_incomplete_list.txt'
    save_good_txt_path = os.path.basename(data_folder) + '_good_list.txt'
    file_count = len(vector_files)
    basic.outputlogMessage('The number of vector files: %d'%file_count)

    good_files = []
    if os.path.isfile(save_good_txt_path):
        good_files.extend(io_function.read_list_from_txt(save_good_txt_path))
    incomplete_files = []

    # remove good one for the list
    if len(good_files)>0:
        vector_files = [item for item in vector_files if os.path.basename(item) not in good_files]

    if process_num == 1:
        # tifs = io_function.get_file_list_by_ext('.tif',arcticDEM_reg_tif_dir, bsub_folder=False)
        for idx, tif in enumerate(vector_files):
            if check_one_vector_file(idx, file_count, tif, good_files):
                good_files.append(os.path.basename(tif))
            else:
                incomplete_files.append(os.path.basename(tif))
    else:
        theadPool = Pool(process_num)  # multi processes
        parameters_list = [(idx, file_count, tif, good_files) for idx, tif in enumerate(vector_files)]
        results = theadPool.starmap(check_one_vector_file, parameters_list)  # need python3
        for tif, res in zip(vector_files, results):
            if res:
                good_files.append(os.path.basename(tif))
            else:
                incomplete_files.append(os.path.basename(tif))
        theadPool.close()

    io_function.save_list_to_txt(save_invalid_txt_path, incomplete_files)
    io_function.save_list_to_txt(save_good_txt_path, good_files)

Example #13

0

Show file

def main():
    basic.setlogfile('log_convert_dem_diff_to8bit.txt')
    if os.path.isdir(grid_dem_diffs_8bit_dir) is False:
        io_function.mkdir(grid_dem_diffs_8bit_dir)

    dem_diff_list = io_function.get_file_list_by_pattern(grid_dem_diffs_dir,'*DEM_diff_grid*.tif')
    count = len(dem_diff_list)
    failed_tifs = []
    for idx, tif in enumerate(dem_diff_list):
        print('%d/%d convert %s to 8 bit'%(idx+1, count, tif))
        tif_8bit = io_function.get_name_by_adding_tail(tif, '8bit')
        output = os.path.join(grid_dem_diffs_8bit_dir, os.path.basename(tif_8bit))
        if dem_tif_to_8bit(tif,output) is False:
            failed_tifs.append(tif)

    if len(failed_tifs)>0:
        io_function.save_list_to_txt('failed_dem_diff_to8bit.txt',failed_tifs)

Example #14

0

Show file

File: parallel_processing_curc.py Project: yghlc/rs_data_proc

def submit_hillshade_newest_headwall_line_grid_job(ids_list, idx,
                                                   grid_base_name,
                                                   max_job_count):

    wait_if_reach_max_jobs(max_job_count, 'dLi')  # draw Line on hillshade

    job_name = 'dLi%d' % idx
    check_length_jobname(job_name)
    work_dir = working_dir_string(idx,
                                  'hillshade_newest_headwall_line_',
                                  root=root_dir)
    if os.path.isdir(work_dir) is False:
        io_function.mkdir(work_dir)
        os.chdir(work_dir)

        ids_list = [str(item) for item in ids_list]
        io_function.save_list_to_txt(grid_base_name + '.txt', ids_list)

        # prepare job
        sh_list = [
            'hillshade_headwall_line_grid.sh',
            'job_hillshade_headwall_line_grid.sh'
        ]
        copy_curc_job_files(jobsh_dir, work_dir, sh_list)
        slurm_utility.modify_slurm_job_sh(
            'job_hillshade_headwall_line_grid.sh', 'job-name', job_name)
    else:
        os.chdir(work_dir)
        submit_job_names = slurm_utility.get_submited_job_names(curc_username)
        if job_name in submit_job_names:
            print(
                'The folder: %s already exist and the job has been submitted, skip submitting a new job'
                % work_dir)
            return

        # job is completed
        if os.path.isfile('done.txt'):
            print('The job in the folder: %s is Done' % work_dir)
            return

    # submit the job
    # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note
    submit_job_curc_or_run_script_local('job_hillshade_headwall_line_grid.sh',
                                        'hillshade_headwall_line_grid.sh')

    os.chdir(curr_dir_before_start)

Example #15

0

Show file

File: process_largeRegion_butLimited_storage.py Project: yghlc/rs_data_proc

def make_note_all_task_done(extent_shp, reomte_node):
    if os.path.isdir(grid_ids_txt_dir) is False:
        io_function.mkdir(grid_ids_txt_dir)

    shp_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)

    # shp_grid_id_txt should be in the current folder
    if os.path.isfile(log_grid_ids_txt) is False:
        io_function.copy_file_to_dst(shp_grid_id_txt, log_grid_ids_txt)

    if os.path.isfile(log_grid_ids_txt_done) is False:
        io_function.save_list_to_txt(log_grid_ids_txt_done, ['Done'])
        # copy the curc
        r_grid_ids_txt_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/grid_ids_txt'
        scp_communicate.copy_file_folder_to_remote_machine(
            reomte_node, r_grid_ids_txt_dir, log_grid_ids_txt_done)

Example #16

0

Show file

def organize_files(sub_img_dirs, save_dir):
    if os.path.isfile(save_dir) is False:
        io_function.mkdir(save_dir)

    # get all png files
    png_list = []
    for img_dir in sub_img_dirs:
        pngs = io_function.get_file_list_by_pattern(img_dir, '*.png')
        png_list.extend(pngs)

    image_name_list = []
    images_dir = os.path.join(save_dir, 'images')
    imageBound_dir = os.path.join(save_dir, 'imageBound')
    objectPolygons_dir = os.path.join(save_dir, 'objectPolygons')
    io_function.mkdir(images_dir)
    io_function.mkdir(imageBound_dir)
    io_function.mkdir(objectPolygons_dir)

    for idx, png in enumerate(png_list):
        basename = io_function.get_name_no_ext(png)
        new_name = 'img' + str(idx + 1).zfill(6) + '_' + basename
        image_name_list.append(new_name)

        io_function.copy_file_to_dst(
            png, os.path.join(images_dir, new_name + '.png'))
        png_xml = png + '.aux.xml'
        if os.path.isfile(png_xml):
            io_function.copy_file_to_dst(
                png_xml, os.path.join(images_dir, new_name + '.png.aux.xml'))

        bound_path = png.replace('.png', '_bound.geojson')
        io_function.copy_file_to_dst(
            bound_path,
            os.path.join(imageBound_dir, new_name + '_bound.geojson'))

        digit_str = re.findall('_\d+', basename)
        id_str = digit_str[0][1:]
        object_path = os.path.join(os.path.dirname(png),
                                   'id_%s.geojson' % id_str)
        io_function.copy_file_to_dst(
            object_path, os.path.join(objectPolygons_dir,
                                      new_name + '.geojson'))

    txt_path = os.path.join(save_dir, 'imageList.txt')
    io_function.save_list_to_txt(txt_path, image_name_list)

Example #17

0

Show file

def get_augment_options():
    from itertools import combinations

    # test_id = 0
    img_aug_options = []
    for count in range(1, 9):
        comb = combinations([
            'flip', 'blur', 'crop', 'scale', 'rotate', 'bright', 'contrast',
            'noise'
        ], count)
        for idx, img_aug in enumerate(list(comb)):
            # spaces are not allow in img_aug_str
            img_aug_str = ','.join(img_aug)
            img_aug_options.append(img_aug_str)

    io_function.save_list_to_txt('img_aug_str.txt', img_aug_options)

    return img_aug_options

Example #18

0

Show file

def save_id_grid_no_dem(grid_id):
    # grid_dem_diff_less2dem_txt
    if os.path.isdir(process_log_dir) is False:
        io_function.mkdir(process_log_dir)
    # update grid_dem_diff_less2dem_txt file
    id_list = []
    if os.path.isfile(grid_no_dem_txt):
        id_list = io_function.read_list_from_txt(
            grid_no_dem_txt)  # no need covert to int
    id_str = str(grid_id)
    if id_str in id_list:
        return True
    else:
        # save
        id_list.append(str(grid_id))
        io_function.save_list_to_txt(grid_no_dem_txt, id_list)
        basic.outputlogMessage('Save gird id (%d) to %s' %
                               (grid_id, grid_no_dem_txt))
        return True

Example #19

0

Show file

File: parallel_processing_curc.py Project: yghlc/rs_data_proc

def submit_extract_headwall_job(slope_tifs, idx, max_job_count):

    wait_if_reach_max_jobs(max_job_count, 'HW')

    job_name = 'HW%d' % idx
    check_length_jobname(job_name)
    work_dir = working_dir_string(idx, 'extract_headwall_', root=root_dir)
    if os.path.isdir(work_dir) is False:
        io_function.mkdir(work_dir)
        os.chdir(work_dir)

        io_function.save_list_to_txt('slope_tif_list.txt', slope_tifs)

        # run segmentation
        sh_list = ['job_healwall.sh', 'extract_headwall_from_slope.sh']
        copy_curc_job_files(jobsh_dir, work_dir, sh_list)
        slurm_utility.modify_slurm_job_sh('job_healwall.sh', 'job-name',
                                          job_name)

    else:
        os.chdir(work_dir)

        # job is completed
        if os.path.isfile('done.txt'):
            print('The job in the folder: %s is Done' % work_dir)
            return

        submit_job_names = slurm_utility.get_submited_job_names(curc_username)
        if job_name in submit_job_names:
            print(
                'The folder: %s already exist and the job has been submitted, skip submitting a new job'
                % work_dir)
            return

    # submit the job
    # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note
    submit_job_curc_or_run_script_local('job_healwall.sh',
                                        'extract_headwall_from_slope.sh')

    os.chdir(curr_dir_before_start)

    return

Example #20

0

Show file

File: parallel_processing_curc.py Project: yghlc/rs_data_proc

def get_grid_ids_extent(extent_shp):
    if 'ArcticDEM_grid_20km' in os.path.basename(extent_shp):
        print(
            'input %s like a grid files, read grid polygons and ids from it directly'
            % extent_shp)
        grid_polys, grid_ids = vector_gpd.read_polygons_attributes_list(
            extent_shp, 'grid_id')
        file_name_base = os.path.splitext(os.path.basename(extent_shp))[0]
        shp_corresponding_grid_ids_txt = file_name_base + '_grid_ids.txt'
        io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,
                                     [str(item) for item in grid_ids])
    else:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')  # in this file, it's "id", not "grid_id"
        print('time cost of read polygons and attributes', time.time() - time0)
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    return grid_ids

Example #21

0

Show file

File: process_largeRegion_butLimited_storage.py Project: yghlc/rs_data_proc

def save_list_no_need_dem_files(file_name, file_list):
    if len(file_list) < 1:
        return True
    # update the file list
    save_list = []
    if os.path.isfile(file_name):
        save_list = io_function.read_list_from_txt(file_name)
    for item in file_list:
        if item in save_list:
            continue
        save_list.append(item)
    return io_function.save_list_to_txt(file_name, save_list)

Example #22

0

Show file

File: process_largeRegion_butLimited_storage.py Project: yghlc/rs_data_proc

def sync_log_files(process_node, r_log_dir, process_log_dir):
    # copy complete id list, dem info, grid_no_dem_ids.txt to remote machine
    files_to_processNode = [
        'strip_dem_cover_grids.txt', 'tile_dem_cover_grids.txt',
        'grid_complete_ids.txt', 'grid_no_dem_ids.txt'
    ]
    for file in files_to_processNode:
        scp_communicate.copy_file_folder_to_remote_machine(
            process_node, os.path.join(r_log_dir, file),
            os.path.join(process_log_dir, file))

    files_from_processNode = [
        'grid_dem_diff_less2dem_ids.txt', 'grid_no_valid_dem_ids.txt',
        'grid_no_headwall_ids.txt', 'grid_no_subscidence_poly_ids.txt',
        'grid_no_watermask_ids.txt'
    ]

    remote_name = process_node[1:].replace('_host',
                                           '')  # change $curc_host to curc
    for file in files_from_processNode:
        # copy the file, do not overwrite the local file
        remote_file = os.path.join(
            process_log_dir,
            io_function.get_name_by_adding_tail(file, remote_name))
        scp_communicate.copy_file_folder_from_remote_machine(
            process_node, os.path.join(r_log_dir, file), remote_file)
        # if they are new ids, then merged to "file"
        local_file = os.path.join(process_log_dir, file)
        remote_ids = io_function.read_list_from_txt(
            remote_file) if os.path.isfile(remote_file) else [
            ]  # no need, to int
        local_ids = io_function.read_list_from_txt(
            local_file) if os.path.isfile(local_file) else []
        new_ids = [id for id in remote_ids if id not in local_ids]
        if len(new_ids) < 1:
            continue
        else:
            local_ids.extend(new_ids)
            io_function.save_list_to_txt(local_file, local_ids)

Example #23

0

Show file

def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt,
                                       log_grid_ids_txt):

    # if it in the logdir, not the current dir, then copy it
    if os.path.isfile(
            log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False:
        io_function.copy_file_to_dst(log_grid_ids_txt,
                                     local_grid_id_txt,
                                     overwrite=False)
        return True

    # if not in the local dir, then generate it
    if os.path.isfile(local_grid_id_txt) is False:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')
        print('time cost of read polygons and attributes', time.time() - time0)

        # this will create local_grid_id_txt
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

        # modify local_grid_id_txt by excluding grid_id already in adjacent extent
        other_grid_ids = read_grid_ids_from_other_extent()
        grid_ids = [id for id in grid_ids if id not in other_grid_ids]

        # over write local_grid_id_txt file
        grid_ids_str = [str(item) for item in grid_ids]
        io_function.copy_file_to_dst(local_grid_id_txt,
                                     io_function.get_name_by_adding_tail(
                                         local_grid_id_txt,
                                         'noRMadj'))  # save a copy
        io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str)

        # copy to log dir
        io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt)

    return True

Example #24

0

Show file

File: ArcticDEM_proc_grid.py Project: yghlc/rs_data_proc

def mosaic_dem_list(key, dem_list, save_tif_dir, resample_method, save_source,
                    o_format):

    save_mosaic = os.path.join(save_tif_dir, key + '.tif')
    # check file existence
    # if os.path.isfile(save_mosaic):
    b_save_mosaic = io_function.is_file_exist_subfolder(
        save_tif_dir, key + '.tif')
    if b_save_mosaic is not False:
        basic.outputlogMessage('warning, mosaic file: %s exist, skip' %
                               b_save_mosaic)
        return b_save_mosaic
        # mosaic_list.append(b_save_mosaic)
        # continue
    # save the source file for producing the mosaic
    if save_source:
        save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt')
        io_function.save_list_to_txt(save_mosaic_source_txt, dem_list)

    # if len(demTif_groups[key]) == 1:
    #     io_function.copy_file_to_dst(demTif_groups[key][0],save_mosaic)
    # else:
    #     # RSImageProcess.mosaics_images(dem_groups[key],save_mosaic)
    #     RSImageProcess.mosaic_crop_images_gdalwarp(demTif_groups[key],save_mosaic,resampling_method=resample_method,o_format=o_format)

    # create mosaic, can handle only input one file
    result = RSImageProcess.mosaic_crop_images_gdalwarp(
        dem_list,
        save_mosaic,
        resampling_method=resample_method,
        o_format=o_format,
        compress='lzw',
        tiled='yes',
        bigtiff='if_safer')
    if result is False:
        return False
    return save_mosaic

Example #25

0

Show file

File: dem_mosaic_crop.py Project: yghlc/rs_data_proc

def mosaic_dem_list(key, dem_list, save_tif_dir,resample_method,save_source, o_format, thread_num=1):

    
    # print('\n\n os.fork \n\n', os.fork())
    # if os.fork()==0:
    #     proc_id = multiprocessing.current_process().pid
    #     basic.setlogfile('log_file_pid_%d.txt'%proc_id)
    
    save_mosaic = os.path.join(save_tif_dir, key + '.tif')
    # check file existence
    # if os.path.isfile(save_mosaic):
    b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif')
    if b_save_mosaic is not False:
        basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic)
        return save_mosaic
        # mosaic_list.append(b_save_mosaic)
        # continue
    # save the source file for producing the mosaic
    if save_source:
        save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt')
        io_function.save_list_to_txt(save_mosaic_source_txt, dem_list)

    # if only one dem, then copy it if it's not VRT format
    if len(dem_list) == 1:
        if raster_io.get_driver_format(dem_list[0]) != 'VRT':
            io_function.copy_file_to_dst(dem_list[0], save_mosaic)
            return save_mosaic

    # create mosaic, can handle only input one file, but is slow
    result = RSImageProcess.mosaic_crop_images_gdalwarp(dem_list, save_mosaic, resampling_method=resample_method,
                                               o_format=o_format,
                                               compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=thread_num)
    if result is False:
        sys.exit(1)
        # return False
    return save_mosaic

Example #26

0

Show file

File: dem_headwall_subImages.py Project: yghlc/rs_data_proc

def main():

    # get shapefile list
    headwall_shp_list = io_function.get_file_list_by_ext('.shp',
                                                         dem_headwall_shp_dir,
                                                         bsub_folder=False)
    if len(headwall_shp_list) < 1:
        raise ValueError('NO shapefile in %s' % dem_headwall_shp_dir)

    failed_shp = []
    out_dir = dem_hillshade_subImages_headwall
    if len(sys.argv) == 2:
        # change the output dir
        out_dir = sys.argv[1]

    for idx, shp in enumerate(headwall_shp_list):
        print('(%d/%d) extract sub images for %s' %
              (idx, len(headwall_shp_list), shp))

        if set_image_dir_patter_description(dem_hillshade_dir, shp) is False:
            continue

        save_dir = os.path.join(out_dir,
                                os.path.splitext(os.path.basename(shp))[0])
        if os.path.isdir(save_dir):
            print('Warning, skip due to subImages for %s may exist' % shp)
            continue
        io_function.mkdir(save_dir)

        res = os.system(extract_py +
                        ' -p para_file_subImage.ini -o %s ' % save_dir + shp)
        if res != 0:
            failed_shp.append(shp)

    if len(failed_shp) > 0:
        io_function.save_list_to_txt('failed_shp.txt', failed_shp)

Example #27

0

Show file

File: dem_segment_subsidence_jobs.py Project: yghlc/rs_data_proc

def produce_products_dem_subsidence(b_remove_job_folder=True):
    # run segment jobs in local workstations.

    task = 'segment'
    max_list_count = 20
    if 'donostia' in machine_name:
        max_list_count = 8  # donostia is really slow, assigined less task to it
    job_list_pre = 'job_seg_dem_diff_list_'

    if os.path.isdir(dem_common.process_log_dir) is False:
        io_function.mkdir(dem_common.process_log_dir)

    dem_list_txt = os.path.join(dem_common.process_log_dir,
                                job_list_pre + machine_name + '.txt')

    # when submit segment of dem_diff, no need ext_shp
    ext_shp = "monitor_fail_segment_jobs"

    while True:
        dem_diff_list = get_dem_diff_list_to_seg()

        # only handle file are old enough
        dem_diff_list = get_dem_diff_old_enough(dem_diff_list)

        dem_diff_ids = [get_grid_id_from_path(item) for item in dem_diff_list]
        print('dem_diff_ids')
        print(dem_diff_ids)

        # remove dem_diff already assigined for other machine
        if os.path.isfile(dem_list_txt):
            io_function.delete_file_or_dir(dem_list_txt)
        dem_diff_assigned = read_dem_diff_assigned_to_other_machine(
            job_list_pre)
        assigned_ids = [
            get_grid_id_from_path(item) for item in dem_diff_assigned
        ]
        print('assigned_ids')
        print(assigned_ids)
        keep_idx = [
            idx for idx, id in enumerate(dem_diff_ids)
            if id not in assigned_ids
        ]
        dem_diff_list = [dem_diff_list[item] for item in keep_idx]

        if len(dem_diff_list) < 1:
            print(
                datetime.now(),
                'there is no DEM_diff for %s to seg, wait 10 minutes' %
                machine_name)
            time.sleep(600)  # wait 10 min
            continue

        # save some of them to txt, for "parallel_processing_curc.py"
        dem_diff_list = dem_diff_list[:max_list_count]
        save_ids = [get_grid_id_from_path(item) for item in dem_diff_list]
        print('save_ids')
        print(save_ids)

        io_function.save_list_to_txt(dem_list_txt, dem_diff_list)

        res = os.system('./run.sh %s %s' % (ext_shp, task))
        if res != 0:
            sys.exit(1)

        copy_segment_result_to_curc(save_ids)

        if b_remove_job_folder:
            os.system('rm -r seg_dem_diff_*')
            io_function.delete_file_or_dir(dem_list_txt)

Example #28

0

Show file

File: grey_image_segment.py Project: yghlc/rs_data_proc

def segment_a_grey_image(img_path,
                         save_dir,
                         process_num,
                         org_raster=None,
                         b_save_patch_label=False):

    out_pre = os.path.splitext(os.path.basename(img_path))[0]
    label_path = os.path.join(save_dir, out_pre + '_label.tif')
    if os.path.isfile(label_path):
        basic.outputlogMessage('%s exist, skip segmentation' % label_path)
        return label_path

    height, width, band_num, date_type = raster_io.get_height_width_bandnum_dtype(
        img_path)
    print('input image: height, width, band_num, date_type', height, width,
          band_num, date_type)

    # if the original data is available, then calculate the attributes based on that
    if org_raster is not None:
        org_height, org_width, org_band_num, org_date_type = raster_io.get_height_width_bandnum_dtype(
            org_raster)
        if org_height != height or org_width != width:
            raise ValueError('%s and %s do not have the same size' %
                             (img_path, org_raster))

    save_labes = np.zeros((height, width), dtype=np.int32)
    # divide the image the many small patches, then calcuate one by one, solving memory issues.
    image_patches = split_image.sliding_window(width,
                                               height,
                                               1024,
                                               1024,
                                               adj_overlay_x=0,
                                               adj_overlay_y=0)
    patch_count = len(image_patches)

    # for idx, patch in enumerate(image_patches):
    #     out_patch,out_labels = segment_a_patch(idx, patch, patch_count,img_path)
    #     # copy to the entire image
    #     row_s = patch[1]
    #     row_e = patch[1] + patch[3]
    #     col_s = patch[0]
    #     col_e = patch[0] + patch[2]
    #     save_labes[row_s:row_e, col_s:col_e] = out_labels

    theadPool = Pool(process_num)
    parameters_list = [(idx, patch, patch_count, img_path, org_raster,
                        b_save_patch_label)
                       for idx, patch in enumerate(image_patches)]
    results = theadPool.starmap(segment_a_patch, parameters_list)

    patch_label_path_list = []
    patch_label_id_range = []
    object_attributes = {}  # object id (label) and attributes (list)
    for res in results:
        patch, out_labels, nodata, attributes = res
        if isinstance(
                out_labels,
                str) and os.path.isfile(out_labels):  #if it's a label file
            patch_label_path_list.append(out_labels)
        else:
            # copy to the entire image
            row_s = patch[1]
            row_e = patch[1] + patch[3]
            col_s = patch[0]
            col_e = patch[0] + patch[2]
            current_min = np.max(save_labes)
            print('current_max', current_min)
            patch_label_id_range.append(current_min)
            save_labes[row_s:row_e, col_s:col_e] = out_labels + current_min + 1
            if attributes is not None:
                update_label_attr = {}
                for key in attributes:
                    update_label_attr[key + current_min] = attributes[key]
                # add to the attributes
                object_attributes.update(update_label_attr)

    # # apply median filter (remove some noise), #  we should not use median  filter, because it's labels, not images.
    # label_blurs = cv2.medianBlur(np.float32(save_labes), 3)  # with kernal=3, cannot accept int32
    # # print(label_blurs, label_blurs.dtype)
    # save_labes = label_blurs.astype(np.int32)

    # return a list of labels saved in current working folder.
    if b_save_patch_label:
        return patch_label_path_list

    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)

    # save attributes (if not empty)
    if object_attributes:
        attribute_path = os.path.join(save_dir, out_pre + '_attributes.txt')
        io_function.save_dict_to_txt_json(attribute_path, object_attributes)

    # save the label
    raster_io.save_numpy_array_to_rasterfile(save_labes, label_path,
                                             img_path)  # do not set nodata
    # save id ranges to txt
    label_id_range_txt = os.path.splitext(label_path)[0] + '_IDrange.txt'
    patch_label_id_range = [str(item) for item in patch_label_id_range]
    io_function.save_list_to_txt(label_id_range_txt, patch_label_id_range)

    return label_path

Example #29

0

Show file

def download_dem_tarball(dem_index_shp,
                         extent_polys,
                         save_folder,
                         pre_name,
                         reg_tif_dir=None,
                         poly_ids=None,
                         b_arcticDEM_tile=False):
    # read dem polygons and url
    dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'fileurl', b_fix_invalid_polygon=False)

    basic.outputlogMessage('%d dem polygons in %s' %
                           (len(dem_polygons), dem_index_shp))

    dem_tar_ball_list = []
    reg_tifs_list = []
    curr_dir = os.getcwd()
    b_save_grid_id_noDEM = True
    if poly_ids is None:
        poly_ids = [idx for idx in range(len(extent_polys))]
        b_save_grid_id_noDEM = False  # if poly_ids is not the global unique id, then don't save it.

    if os.path.isfile('no_registration_strips.txt'):
        no_registration_strips = io_function.read_list_from_txt(
            'no_registration_strips.txt')
    else:
        no_registration_strips = []

    # tarballs is being downloaded
    downloading_tarballs = []

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage('get data for the %d th extent (%d/%d)' %
                               (idx, count, len(extent_polys)))

        save_txt_path = pre_name + '_dem_urls_poly_%d.txt' % idx
        if os.path.isfile(save_txt_path):
            urls = io_function.read_list_from_txt(save_txt_path)
            basic.outputlogMessage('read %d dem urls from %s' %
                                   (len(urls), save_txt_path))
        else:
            # get fileurl
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(
                dem_polygons, ext_poly)
            basic.outputlogMessage('find %d DEM within %d th extent' %
                                   (len(dem_poly_ids), (idx)))
            urls = [dem_urls[id] for id in dem_poly_ids]

            # save to txt
            io_function.save_list_to_txt(save_txt_path, urls)
            basic.outputlogMessage('save dem urls to %s' % save_txt_path)

        if len(urls) > 0:

            # total_size_GB = get_total_size(urls)  # internet access, parallel running may cause problem. The info is not important
            # basic.outputlogMessage('the size of files will be downloaded is %.4lf GB for the %d th extent '%(total_size_GB,(idx+1)))
            # time.sleep(5)   # wait 5 seconds

            # download them using wget one by one
            for ii, url in enumerate(urls):
                tmp = urlparse(url)

                # in the Strip DEM, there are around 700 url are point to tif files, failed to download them
                # e.g. /mnt/pgc/data/elev/dem/setsm/ArcticDEM/geocell/v3.0/2m_temp/n59w137/SETSM_WV03_20150518_104001000B703200_104001000C715B00_seg8_2m_v3.0_dem.tif
                if url.startswith('/mnt') and url.endswith('.tif'):
                    basic.outputlogMessage("error: not a valid url: %s" % url)
                    continue

                filename = os.path.basename(tmp.path)
                save_dem_path = os.path.join(save_folder, filename)
                if reg_tif_dir is not None:
                    tar_base = os.path.basename(filename)[:-7]
                    # file_pattern = ['*dem_reg.tif', '*reg_dem.tif'] # Arctic strip and tile (mosaic) version
                    if b_arcticDEM_tile:
                        reg_tifs = io_function.get_file_list_by_pattern(
                            reg_tif_dir, tar_base + '*reg_dem.tif')
                    else:
                        reg_tifs = io_function.get_file_list_by_pattern(
                            reg_tif_dir, tar_base + '*dem_reg.tif')
                    if len(reg_tifs) > 0:
                        basic.outputlogMessage(
                            'warning, unpack and registrated tif for %s already exists, skip downloading'
                            % filename)
                        reg_tifs_list.append(reg_tifs[0])
                        continue

                    if './' + tar_base in no_registration_strips:
                        basic.outputlogMessage(
                            'warning, %s is in no_registration_strips list, skip downloading'
                            % filename)
                        continue

                if filename in downloading_tarballs:
                    basic.outputlogMessage(
                        'warning, %s is being downloaded by other processes' %
                        filename)
                    continue

                if os.path.isfile(
                        save_dem_path) and os.path.getsize(save_dem_path) > 1:
                    basic.outputlogMessage(
                        'warning, %s already exists, skip downloading' %
                        filename)
                else:
                    # download the dem
                    basic.outputlogMessage(
                        'starting downloading %d th DEM (%d in total)' %
                        ((ii + 1), len(urls)))
                    downloading_tarballs.append(filename)

                    # os.chdir(save_folder)

                    # run_a_process_download(url)  # download

                    ##################################################
                    # download in parallel
                    basic.check_exitcode_of_process(
                        download_tasks
                    )  # if there is one former job failed, then quit

                    while True:
                        job_count = basic.alive_process_count(download_tasks)
                        if job_count >= max_task_count:
                            print(
                                machine_name, datetime.now(),
                                'You are running %d or more tasks in parallel, wait '
                                % max_task_count)
                            time.sleep(60)  #
                            continue
                        break

                    # start the processing
                    sub_process = Process(
                        target=run_a_process_download,
                        args=(url, save_dem_path, reg_tif_dir, max_task_count,
                              b_unpack_after_downloading
                              ))  # start a process, don't wait
                    sub_process.start()
                    download_tasks.append(sub_process)

                    basic.close_remove_completed_process(download_tasks)

                    # os.chdir(curr_dir)

                dem_tar_ball_list.append(save_dem_path)

        else:
            basic.outputlogMessage(
                'Warning, can not find DEMs within %d th extent' % (idx))
            if b_save_grid_id_noDEM:
                save_id_grid_no_dem(idx)

    # wait until all task complete
    while True:
        job_count = basic.alive_process_count(download_tasks)
        if job_count > 0:
            print(
                machine_name, datetime.now(),
                'wait until all task are completed, alive task account: %d ' %
                job_count)
            time.sleep(60)  #
        else:
            break

    return dem_tar_ball_list, reg_tifs_list

Example #30

0

Show file

def get_grid_20(extent_shp_or_id_txt, grid_polys, ids):
    '''
    get grid polygons and ids based on input extent (polygon in shpaefile) or ids (txt file)
    if "file_name_base+'_grid_ids.txt'" exists, it will read id in this file directly.
    :param extent_shp_or_id_txt:
    :param grid_polys:
    :param ids:
    :return:
    '''

    io_function.is_file_exist(extent_shp_or_id_txt)

    if extent_shp_or_id_txt.endswith('.txt'):
        grid_ids = io_function.read_list_from_txt(extent_shp_or_id_txt)
        grid_ids = [int(item) for item in grid_ids ]
    else:
        shp_corresponding_grid_ids_txt = get_corresponding_grid_ids_txt(extent_shp_or_id_txt)
        if os.path.isfile(shp_corresponding_grid_ids_txt):
            print('corresponding grid ids txt file for %s exists, read grid id from txt'%extent_shp_or_id_txt)
            grid_ids = [ int(item) for item in io_function.read_list_from_txt(shp_corresponding_grid_ids_txt)]
            basic.outputlogMessage('read %d grids within the extents (%s)'
                                   % (len(grid_ids), os.path.basename(extent_shp_or_id_txt)))
        else:
            # extent polygons and projection (proj4)
            extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp_or_id_txt)
            if extent_shp_prj == '':
                raise ValueError('get proj4 of %s failed'%extent_shp_or_id_txt)
            grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp)
            if grid_shp_prj=='':
                raise ValueError('get proj4 of %s failed' % grid_20_shp)

            if extent_shp_prj != grid_shp_prj:
                basic.outputlogMessage('%s and %s do not have the same projection, will reproject %s'
                                       % (extent_shp_or_id_txt, grid_20_shp, os.path.basename(extent_shp_or_id_txt)))
                epsg = map_projection.get_raster_or_vector_srs_info_epsg(grid_20_shp)
                # print(epsg)
                # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip())
                extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp_or_id_txt, epsg)
            else:
                extent_polys = vector_gpd.read_polygons_gpd(extent_shp_or_id_txt)

            ext_poly_count = len(extent_polys)
            if ext_poly_count < 1:
                raise ValueError('No polygons in %s'%extent_shp_or_id_txt)
            grid_index = []
            # if there are many polygons, this will take time.
            for idx,ext_poly in enumerate(extent_polys):
                print(timeTools.get_now_time_str(), 'get grids for extent idx', idx, 'total polygons:',ext_poly_count)
                index = vector_gpd.get_poly_index_within_extent(grid_polys, ext_poly)
                grid_index.extend(index)
            grid_index = list(set(grid_index))  # remove duplicated ids
            basic.outputlogMessage('find %d grids within the extents (%s)' % (len(grid_index), os.path.basename(extent_shp_or_id_txt)) )

            grid_ids = [ ids[idx] for idx in grid_index]
            grid_ids_str = [str(item) for item in grid_ids ]
            io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,grid_ids_str)

    id_index = [ids.index(id) for id in grid_ids]
    selected_grid_polys = [grid_polys[idx] for idx in id_index ]

    return selected_grid_polys, grid_ids