def copy_ini_files(ini_dir, work_dir, para_file, area_ini_list,backbone): import basic_src.io_function as io_function ini_list = [para_file, backbone] ini_list.extend(area_ini_list) for ini in ini_list: io_function.copy_file_to_dst(os.path.join(ini_dir, ini ), os.path.join(work_dir,ini), overwrite=True)
def mosaic_dem_list_gdal_merge(key, dem_list, save_tif_dir,save_source): # Use gdal_merge.py to create a mosaic, In areas of overlap, the last image will be copied over earlier ones. save_mosaic = os.path.join(save_tif_dir, key + '.tif') b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif') if b_save_mosaic is not False: basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic) return save_mosaic # save the source file for producing the mosaic if save_source: save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt') io_function.save_list_to_txt(save_mosaic_source_txt, dem_list) # if only one dem, then copy it if it's not VRT format if len(dem_list) == 1: if raster_io.get_driver_format(dem_list[0]) != 'VRT': io_function.copy_file_to_dst(dem_list[0], save_mosaic) return save_mosaic nodata = raster_io.get_nodata(dem_list[0]) # create mosaic, can handle only input one file, but is slow result = RSImageProcess.mosaics_images(dem_list,save_mosaic,nodata=nodata, compress='lzw', tiled='yes', bigtiff='if_safer') if result is False: sys.exit(1) # return False return save_mosaic
def remove_nonclass_polygon(self, shape_file, out_shp, class_field_name): """ remove polygons that are not belong to targeted class, it means the value of class_field_name is 0 :param shape_file: input shapefile containing all the polygons :param out_shp: output shapefile :param class_field_name: the name of class field, such as svmclass, treeclass :return: True if successful, False Otherwise """ if io_function.is_file_exist(shape_file) is False: return False try: org_obj = shapefile.Reader(shape_file) except: basic.outputlogMessage(str(IOError)) return False # Create a new shapefile in memory w = shapefile.Writer() w.shapeType = org_obj.shapeType org_records = org_obj.records() if (len(org_records) < 1): basic.outputlogMessage('error, no record in shape file ') return False # Copy over the geometry without any changes w.fields = list(org_obj.fields) field_index = self.__find_field_index(w.fields, class_field_name) if field_index is False: return False shapes_list = org_obj.shapes() org_shape_count = len(shapes_list) i = 0 removed_count = 0 for i in range(0, len(shapes_list)): rec = org_records[i] if rec[field_index] == 0: # remove the record which class is 0, 0 means non-gully removed_count = removed_count + 1 continue w._shapes.append(shapes_list[i]) rec = org_records[i] w.records.append(rec) basic.outputlogMessage('Remove non-class polygon, total count: %d' % removed_count) # w._shapes.extend(org_obj.shapes()) if removed_count == org_shape_count: basic.outputlogMessage('error: already remove all the polygons') return False # copy prj file org_prj = os.path.splitext(shape_file)[0] + ".prj" out_prj = os.path.splitext(out_shp)[0] + ".prj" io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True) w.save(out_shp) return True
def copy_ini_files(curr_dir_before_ray, work_dir): import basic_src.io_function as io_function area_ini_list.append('main_para.ini') area_ini_list.extend(backbones) for ini in area_ini_list: io_function.copy_file_to_dst(os.path.join(curr_dir_before_ray, ini), ini, overwrite=True)
def create_new_region_defined_parafile(template_para_file, img_dir, area_remark=None): ''' create a new region defined para file. Only defined the new images (did not change others) :param template_para_file: :param img_dir: :param area_remark: :return: ''' io_function.is_file_exist(template_para_file) dir_base = os.path.basename(img_dir) date_strs = re.findall('\d{8}', dir_base) if len(date_strs) == 1: date = date_strs[0] else: date = 'unknown' new_para_file = io_function.get_name_by_adding_tail( template_para_file, date + '_' + area_remark) new_para_file = os.path.basename(new_para_file) # save to current folder if os.path.isfile(new_para_file): raise IOError('%s already exists, please check or remove first' % new_para_file) # copy the file io_function.copy_file_to_dst(template_para_file, new_para_file) if area_remark is not None: modify_parameter(new_para_file, 'area_remark', area_remark) if date != 'unknown': modify_parameter(new_para_file, 'area_time', date) modify_parameter(new_para_file, 'input_image_dir', img_dir) modify_parameter(new_para_file, 'inf_image_dir', img_dir) tif_list = io_function.get_file_list_by_ext('.tif', img_dir, bsub_folder=False) if len(tif_list) < 1: raise ValueError('No tif in %s' % img_dir) if len(tif_list) == 1: modify_parameter(new_para_file, 'input_image_or_pattern', os.path.basename(tif_list[0])) modify_parameter(new_para_file, 'inf_image_or_pattern', os.path.basename(tif_list[0])) else: modify_parameter(new_para_file, 'input_image_or_pattern', '*.tif') modify_parameter(new_para_file, 'inf_image_or_pattern', '*.tif') print("modified and saved new parameter file: %s " % new_para_file) return new_para_file
def original_sub_images_labels_list_before_gan(): # backup original sub_images list sub_img_label_txt = 'sub_images_labels_list.txt' sub_img_label_txt_noGAN = 'sub_images_labels_list_noGAN.txt' area_ini_sub_images_labels_dict = 'area_ini_sub_images_labels.txt' if os.path.isfile(sub_img_label_txt_noGAN) is False: io_function.copy_file_to_dst(sub_img_label_txt, sub_img_label_txt_noGAN, overwrite=False) return sub_img_label_txt_noGAN, sub_img_label_txt, area_ini_sub_images_labels_dict
def copy_subImages_labels_directly(subImage_dir, subLabel_dir, area_ini): input_image_dir = parameters.get_directory_None_if_absence( area_ini, 'input_image_dir') # it is ok consider a file name as pattern and pass it the following functions to get file list input_image_or_pattern = parameters.get_string_parameters( area_ini, 'input_image_or_pattern') # label raster folder label_raster_dir = parameters.get_directory_None_if_absence( area_ini, 'label_raster_dir') sub_images_list = [] label_path_list = [] if os.path.isdir(subImage_dir) is False: io_function.mkdir(subImage_dir) if os.path.isdir(subLabel_dir) is False: io_function.mkdir(subLabel_dir) sub_images = io_function.get_file_list_by_pattern(input_image_dir, input_image_or_pattern) for sub_img in sub_images: # find the corresponding label raster label_name = io_function.get_name_by_adding_tail( os.path.basename(sub_img), 'label') label_path = os.path.join(label_raster_dir, label_name) if os.path.isfile(label_path): sub_images_list.append(sub_img) label_path_list.append(label_path) else: print('Warning, cannot find label for %s in %s' % (sub_img, label_raster_dir)) # copy sub-images, adding to txt files with open('sub_images_labels_list.txt', 'a') as f_obj: for tif_path, label_file in zip(sub_images_list, label_path_list): if label_file is None: continue dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path)) # copy sub-images io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=True) dst_label_file = os.path.join(subLabel_dir, os.path.basename(label_file)) io_function.copy_file_to_dst(label_file, dst_label_file, overwrite=True) sub_image_label_str = dst_subImg + ":" + dst_label_file + '\n' f_obj.writelines(sub_image_label_str)
def make_note_all_task_done(extent_shp, reomte_node): if os.path.isdir(grid_ids_txt_dir) is False: io_function.mkdir(grid_ids_txt_dir) shp_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) # shp_grid_id_txt should be in the current folder if os.path.isfile(log_grid_ids_txt) is False: io_function.copy_file_to_dst(shp_grid_id_txt, log_grid_ids_txt) if os.path.isfile(log_grid_ids_txt_done) is False: io_function.save_list_to_txt(log_grid_ids_txt_done, ['Done']) # copy the curc r_grid_ids_txt_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/grid_ids_txt' scp_communicate.copy_file_folder_to_remote_machine( reomte_node, r_grid_ids_txt_dir, log_grid_ids_txt_done)
def check_one_extent(extent_shp): print('start to check %s' % extent_shp) # local_grid_id_txt is in the current dir # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) if os.path.isfile(local_grid_id_txt) is False and os.path.isfile( log_grid_ids_txt): io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt) if os.path.isfile(local_grid_id_txt) is False: print('the _grid_ids.txt for %s does not exist, maybe it has started' % extent_shp) return False # check if it has been complete if os.path.isfile(log_grid_ids_txt_done): basic.outputlogMessage('Tasks for extent %s have been completed' % extent_shp) return True grid_ids_to_process_txt = io_function.get_name_no_ext( extent_shp) + '_' + 'grid_ids_to_process.txt' # read from txt file directly grid_ids = [ int(item) for item in io_function.read_list_from_txt(local_grid_id_txt) ] update_complete_grid_list(grid_ids, task_list) # check complete files, to see if it's done # remove grids that has been complete or ignored ignore_ids = get_complete_ignore_grid_ids() num_grid_ids = save_grid_ids_need_to_process( grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: print(datetime.now(), ' %s is marked as completed' % extent_shp) make_note_all_task_done(extent_shp, curc_node) else: print( datetime.now(), ' %s has not completed, %d grids to process, total: %d' % (extent_shp, num_grid_ids, len(grid_ids))) return True
def move_align_results(ref_dem, dem_tif, save_dir): coreg_save_dir = os.path.join(save_dir, 'dem_coreg') if os.path.isdir(coreg_save_dir) is False: io_function.mkdir(coreg_save_dir) align_outputs = check_align_folder(dem_tif) if len(align_outputs) < 9: raise ValueError('the output of dem_align.py is less than 9 files') dem_align = os.path.join( coreg_save_dir, os.path.basename(io_function.get_name_by_adding_tail(dem_tif, 'coreg'))) # align DEM and a filt version, which one should I use? what filter they apply? # visually check one results (Banks east) , a the same location, align DEM and a filt one have exact values, # but the filt version have more nodata. Let's use the filt version. # the nodata pixels usually are water pixels, but also some inside the thaw slumps align_filt = [ out for out in align_outputs if out.endswith('align_filt.tif') ][0] io_function.move_file_to_dst(align_filt, dem_align, overwrite=True) # copy reference dem if necessary ref_dem_copy = os.path.join(coreg_save_dir, os.path.basename(ref_dem)) if os.path.isfile(ref_dem_copy) is False: io_function.copy_file_to_dst(ref_dem, ref_dem_copy) # move the elevation difference? ele_diff_folder = os.path.join(save_dir, 'dem_diff_from_demcoreg') if os.path.isdir(ele_diff_folder) is False: io_function.mkdir(ele_diff_folder) dem_diff_filt = [ out for out in align_outputs if out.endswith('align_diff_filt.tif') ][0] io_function.movefiletodir(dem_diff_filt, ele_diff_folder, overwrite=True) coreg_png_plot_folder = os.path.join(save_dir, 'demcoreg_png_plot') if os.path.isdir(coreg_png_plot_folder): io_function.mkdir(coreg_png_plot_folder) coreg_pngs = [out for out in align_outputs if out.endswith('.png')] for png in coreg_pngs: io_function.movefiletodir(png, coreg_png_plot_folder, overwrite=True) return True
def save_polygons_to_shp(polygon_list, base_shp, folder): if len(polygon_list) < 1: print('Error, there is no polygon in the list') return False try: shp_obj = shapefile.Reader(base_shp) except IOError: print("Read file: %s failed: " % base_shp + str(IOError)) return False save_shp_list = [] save_id = 0 for polygon in polygon_list: w = shapefile.Writer() w.shapeType = shp_obj.shapeType filename = os.path.join( folder, os.path.splitext(os.path.basename(base_shp))[0] + '_' + str(save_id) + '.shp') if os.path.isfile(filename) is False: w.field('id') w._shapes.append(polygon) w.record(save_id) # copy prj file org_prj = os.path.splitext(base_shp)[0] + ".prj" out_prj = os.path.splitext(filename)[0] + ".prj" io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True) # save to file w.save(filename) else: print('warning: %s already exist, skip' % filename) save_id += 1 save_shp_list.append(filename) return save_shp_list
def prepare_data_for_evaluation(para_file): import workflow.whole_procedure as whole_procedure # get subimages whole_procedure.extract_sub_images_using_training_polygons(para_file) # split image whole_procedure.split_sub_images(para_file) # whole_procedure.training_img_augment(para_file) # whole_procedure.split_train_val(para_file) # put all the images patch for evaluation trainval = os.path.join('list', 'trainval.txt') test_list_txt = parameters.get_string_parameters( para_file, 'validation_sample_list_txt') test_list_txt = os.path.join('list', test_list_txt) io_function.copy_file_to_dst(trainval, test_list_txt, overwrite=True) # covert image to tf-records whole_procedure.build_TFrecord_tf1x(para_file)
def main(options, args): time_str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S') print2file(log, time_str) para_file = options.para_file k_value = int(args[0]) test_num = int(args[1]) print2file(log, 'kvalue : %d' % k_value) print2file(log, 'test_num : %d' % test_num) global trained_model_dir trained_model_dir = parameters.get_string_parameters( para_file, 'expr_name') # get the path of multi training polygons multi_training_files = parameters.get_string_parameters_None_if_absence( para_file, 'multi_training_files') if multi_training_files is None: raise ValueError('multi_training_files is not set in the %s' % para_file) io_function.is_file_exist(multi_training_files) # backup the original training file which contains the full set of polyogns training_files_allPolygons = io_function.get_name_by_adding_tail( multi_training_files, 'allPolygons') if os.path.isfile(training_files_allPolygons) is False: io_function.copy_file_to_dst(multi_training_files, training_files_allPolygons) else: basic.outputlogMessage( 'The full set polygons already exist') #%multi_training_files # training on using the k subset train_kfold_cross_val(training_files_allPolygons, multi_training_files, k_value, test_num)
def mosaic_dem_list(key, dem_list, save_tif_dir,resample_method,save_source, o_format, thread_num=1): # print('\n\n os.fork \n\n', os.fork()) # if os.fork()==0: # proc_id = multiprocessing.current_process().pid # basic.setlogfile('log_file_pid_%d.txt'%proc_id) save_mosaic = os.path.join(save_tif_dir, key + '.tif') # check file existence # if os.path.isfile(save_mosaic): b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif') if b_save_mosaic is not False: basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic) return save_mosaic # mosaic_list.append(b_save_mosaic) # continue # save the source file for producing the mosaic if save_source: save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt') io_function.save_list_to_txt(save_mosaic_source_txt, dem_list) # if only one dem, then copy it if it's not VRT format if len(dem_list) == 1: if raster_io.get_driver_format(dem_list[0]) != 'VRT': io_function.copy_file_to_dst(dem_list[0], save_mosaic) return save_mosaic # create mosaic, can handle only input one file, but is slow result = RSImageProcess.mosaic_crop_images_gdalwarp(dem_list, save_mosaic, resampling_method=resample_method, o_format=o_format, compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=thread_num) if result is False: sys.exit(1) # return False return save_mosaic
def organize_files(sub_img_dirs, save_dir): if os.path.isfile(save_dir) is False: io_function.mkdir(save_dir) # get all png files png_list = [] for img_dir in sub_img_dirs: pngs = io_function.get_file_list_by_pattern(img_dir, '*.png') png_list.extend(pngs) image_name_list = [] images_dir = os.path.join(save_dir, 'images') imageBound_dir = os.path.join(save_dir, 'imageBound') objectPolygons_dir = os.path.join(save_dir, 'objectPolygons') io_function.mkdir(images_dir) io_function.mkdir(imageBound_dir) io_function.mkdir(objectPolygons_dir) for idx, png in enumerate(png_list): basename = io_function.get_name_no_ext(png) new_name = 'img' + str(idx + 1).zfill(6) + '_' + basename image_name_list.append(new_name) io_function.copy_file_to_dst( png, os.path.join(images_dir, new_name + '.png')) png_xml = png + '.aux.xml' if os.path.isfile(png_xml): io_function.copy_file_to_dst( png_xml, os.path.join(images_dir, new_name + '.png.aux.xml')) bound_path = png.replace('.png', '_bound.geojson') io_function.copy_file_to_dst( bound_path, os.path.join(imageBound_dir, new_name + '_bound.geojson')) digit_str = re.findall('_\d+', basename) id_str = digit_str[0][1:] object_path = os.path.join(os.path.dirname(png), 'id_%s.geojson' % id_str) io_function.copy_file_to_dst( object_path, os.path.join(objectPolygons_dir, new_name + '.geojson')) txt_path = os.path.join(save_dir, 'imageList.txt') io_function.save_list_to_txt(txt_path, image_name_list)
def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt, log_grid_ids_txt): # if it in the logdir, not the current dir, then copy it if os.path.isfile( log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False: io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt, overwrite=False) return True # if not in the local dir, then generate it if os.path.isfile(local_grid_id_txt) is False: # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) # this will create local_grid_id_txt grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # modify local_grid_id_txt by excluding grid_id already in adjacent extent other_grid_ids = read_grid_ids_from_other_extent() grid_ids = [id for id in grid_ids if id not in other_grid_ids] # over write local_grid_id_txt file grid_ids_str = [str(item) for item in grid_ids] io_function.copy_file_to_dst(local_grid_id_txt, io_function.get_name_by_adding_tail( local_grid_id_txt, 'noRMadj')) # save a copy io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str) # copy to log dir io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt) return True
# aspect_line_imgAug16_tp=os.path.join(out_dir,'identified_ThawSlumps_MaiinLines_prj_TP.shp') aspect_line_imgAug22_tp = os.path.join( out_dir, 'identified_ThawSlumps_MaiinLines_utm_TP.shp') # output='aspect_ground_truth_winrose.jpg' # draw_one_attribute_windrose(ground_truth,'asp_mean','',output ,color='grey',hatch="") # io_function.copy_file_to_dst(output, os.path.join(out_dir,output), overwrite=True) # draw wind rose of azimuth from manually draw lines # output="aspectLine_ground_truth_winrose.jpg" # draw_one_attribute_windrose(aspect_line, "aspectLine", "Mean Aspect ($^\circ$)", output,color='black') # ,hatch='/' # io_function.copy_file_to_dst(output, os.path.join(out_dir,output), overwrite=True) ####### use mapping polygons #### # output="aspectLine_imgAug16_tp_winrose.jpg" # draw_one_attribute_windrose(aspect_line_imgAug16_tp, "aspectLine", "Mean Aspect ($^\circ$)", output,color='black') # ,hatch='/' output = "aspectLine_imgAug22_tp_winrose.jpg" draw_one_attribute_windrose(aspect_line_imgAug22_tp, "aspectLine", "Mean Aspect ($^\circ$)", output, color='black') # ,hatch='/' io_function.copy_file_to_dst(output, os.path.join(out_dir, output), overwrite=True) ####### use mapping polygons #### # # # clear # os.system('rm processLog.txt') os.system('rm *.jpg')
split_image.split_image(image_path, out_dir, patch_width, patch_height, overlay_x, overlay_y, out_format,pre_name=file_pre_name) with open('sub_images_labels_list.txt') as txt_obj: line_list = [name.strip() for name in txt_obj.readlines()] for line in line_list: sub_image, sub_label = line.split(':') # split sub image split_to_patches(sub_image, 'split_images', patch_w, patch_h, overlay, overlay, split_image_format) # split sub label (change the file name to be the same as sub_image name) pre_name = os.path.splitext(os.path.basename(sub_image))[0] split_to_patches(sub_label, 'split_labels', patch_w, patch_h, overlay, overlay, split_image_format, file_pre_name=pre_name) # output trainval.txt and val.txt file files_list = io_function.get_file_list_by_ext(split_image_format, 'split_images',bsub_folder=False) io_function.mkdir('list') trainval = os.path.join('list','trainval.txt') val = os.path.join('list','val.txt') with open(trainval,'w') as w_obj: for file_name in files_list: w_obj.writelines(os.path.splitext(os.path.basename(file_name))[0] + '\n') io_function.copy_file_to_dst(trainval,val,overwrite=True)
def split_sub_images(para_file): print("split sub-images and sub-labels") if os.path.isfile(para_file) is False: raise IOError('File %s not exists in current folder: %s'%(para_file, os.getcwd())) SECONDS = time.time() if os.path.isdir('split_images'): io_function.delete_file_or_dir('split_images') if os.path.isdir('split_labels'): io_function.delete_file_or_dir('split_labels') io_function.mkdir('split_images') ### split the training image to many small patch (480*480) patch_w=parameters.get_string_parameters(para_file,'train_patch_width') patch_h=parameters.get_string_parameters(para_file,'train_patch_height') overlay_x=parameters.get_string_parameters(para_file,'train_pixel_overlay_x') overlay_y=parameters.get_string_parameters(para_file,'train_pixel_overlay_y') split_image_format=parameters.get_string_parameters(para_file,'split_image_format') trainImg_dir=parameters.get_string_parameters(para_file,'input_train_dir') labelImg_dir=parameters.get_string_parameters(para_file,'input_label_dir') proc_num = parameters.get_digit_parameters(para_file,'process_num','int') if os.path.isdir(trainImg_dir) is False: raise IOError('%s not in the current folder, please get subImages first'%trainImg_dir) if os.path.isdir(labelImg_dir) is False: print('warning, %s not in the current folder'%labelImg_dir) else: io_function.mkdir('split_labels') sub_img_label_txt = 'sub_images_labels_list.txt' if os.path.isfile(sub_img_label_txt) is False: raise IOError('%s not in the current folder, please get subImages first' % sub_img_label_txt) with open(sub_img_label_txt) as txt_obj: line_list = [name.strip() for name in txt_obj.readlines()] # for line in line_list: # sub_image, sub_label = line.split(':') # # # split sub image # split_to_patches(sub_image, 'split_images', patch_w, patch_h, overlay, overlay, split_image_format) # # # split sub label (change the file name to be the same as sub_image name) # pre_name = os.path.splitext(os.path.basename(sub_image))[0] # split_to_patches(sub_label, 'split_labels', patch_w, patch_h, overlay, overlay, split_image_format, file_pre_name=pre_name) parameters_list = [(line, patch_w, patch_h, overlay_x, overlay_y, split_image_format) for line in line_list] theadPool = Pool(proc_num) # multi processes results = theadPool.starmap(split_a_pair_sub_image_label, parameters_list) # need python3 # output trainval.txt and val.txt file files_list = io_function.get_file_list_by_ext(split_image_format, 'split_images',bsub_folder=False) io_function.mkdir('list') trainval = os.path.join('list','trainval.txt') val = os.path.join('list','val.txt') with open(trainval,'w') as w_obj: for file_name in files_list: w_obj.writelines(os.path.splitext(os.path.basename(file_name))[0] + '\n') io_function.copy_file_to_dst(trainval,val,overwrite=True) split_train_val.get_image_with_height_list(trainval, split_image_format, info_type='(no data augmentation)') duration= time.time() - SECONDS os.system('echo "$(date): time cost of splitting sub images and labels: %.2f seconds">>time_cost.txt'%duration)
def add_fields_shape(self, ori_shp, new_shp, output_shp): """ add fields from another shapefile(merge the fields of two shape files), the two shape files should have the same number of features :param ori_shp: the path of original shape file which will be added new field :param new_shp: the shape file contains new fields :output_shp: saved shape file :return:True if successful, False otherwise """ # Read in our existing shapefile if io_function.is_file_exist( ori_shp) is False or io_function.is_file_exist( new_shp) is False: return False try: org_obj = shapefile.Reader(ori_shp) new_obj = shapefile.Reader(new_shp) except IOError: basic.outputlogMessage(str(IOError)) return False if len(org_obj.shapes()) != len(new_obj.shapes()): basic.outputlogMessage( "error: the input two shape file do not have the same number of features" ) return False if org_obj.shapeType != new_obj.shapeType: basic.outputlogMessage( "error: the input two shape file have different shapeType") return False # Create a new shapefile in memory w = shapefile.Writer() w.shapeType = org_obj.shapeType # Copy over the existing fields w.fields = list(org_obj.fields) for t_field in list(new_obj.fields): if isinstance(t_field, tuple): continue w.fields.append(t_field) # Add our new field using the pyshp API # w.field("KINSELLA", "C", "40") # # We'll create a counter in this example # # to give us sample data to add to the records # # so we know the field is working correctly. # i = 1 # # Loop through each record, add a column. We'll # insert our sample data but you could also just # insert a blank string or NULL DATA number # as a place holder org_records = org_obj.records() new_records = new_obj.records() for i in range(0, len(org_records)): rec = org_records[i] for value in new_records[i]: rec.append(value) # Add the modified record to the new shapefile w.records.append(rec) # Copy over the geometry without any changes w._shapes.extend(org_obj.shapes()) # copy prj file org_prj = os.path.splitext(ori_shp)[0] + ".prj" out_prj = os.path.splitext(output_shp)[0] + ".prj" io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True) # Save as a new shapefile (or write over the old one) w.save(output_shp) pass
def remove_shape_baseon_field_value(self, shape_file, out_shp, class_field_name, threashold, smaller=True): """ remove features from shapefile based on the field value, if smaller is true, then the value smaller than threashold will be removed if smaller is False, then the value greater than threashold will be remove :param shape_file: input shape file :param out_shp: saved shape file :param class_field_name: the name of class field, such as area :param threashold: threashold value :param smaller: if smaller is true, then the value smaller than threashold will be removed, :return: True if successful, False otherwise """ if io_function.is_file_exist(shape_file) is False: return False try: org_obj = shapefile.Reader(shape_file) except: basic.outputlogMessage(str(IOError)) return False # Create a new shapefile in memory w = shapefile.Writer() w.shapeType = org_obj.shapeType org_records = org_obj.records() if (len(org_records) < 1): basic.outputlogMessage('error, no record in shape file ') return False # Copy over the geometry without any changes w.fields = list(org_obj.fields) field_index = self.__find_field_index(w.fields, class_field_name) if field_index is False: return False shapes_list = org_obj.shapes() i = 0 removed_count = 0 if smaller is True: for i in range(0, len(shapes_list)): rec = org_records[i] if rec[field_index] < threashold: # remove the record which is smaller than threashold removed_count = removed_count + 1 continue w._shapes.append(shapes_list[i]) rec = org_records[i] w.records.append(rec) else: for i in range(0, len(shapes_list)): rec = org_records[i] if rec[field_index] > threashold: # remove the record which is greater than threashold removed_count = removed_count + 1 continue w._shapes.append(shapes_list[i]) rec = org_records[i] w.records.append(rec) basic.outputlogMessage('Remove polygons based on %s, total count: %d' % (class_field_name, removed_count)) # w._shapes.extend(org_obj.shapes()) # copy prj file org_prj = os.path.splitext(shape_file)[0] + ".prj" out_prj = os.path.splitext(out_shp)[0] + ".prj" io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True) w.save(out_shp) return True
def train_kfold_cross_val(multi_training_files_allPolygons, multi_training_files, k_value, test_num): ################################################################## # get subset of polygons training_shp_all = [] with open(multi_training_files_allPolygons, 'r') as f_obj: training_lines = f_obj.readlines() for line in training_lines: line = line.strip() training_shp_all.append( line.split(':')[-1]) # the last one is the shape file for training_shpAll in training_shp_all: dir = os.path.dirname(training_shpAll) file_name = os.path.basename(training_shpAll) file_name_no_ext = os.path.splitext(file_name)[0] dir_sub = os.path.join( dir, '%s_%d-fold_cross_val_t%d' % (file_name_no_ext, k_value, test_num)) if os.path.isdir(dir_sub) is False: # will save to dir_sub} io_function.mkdir(dir_sub) create_shp_subset_polygons(dir_sub, training_shpAll, file_name, k_value) else: # check shape file existence sub_shps = io_function.get_file_list_by_pattern(dir_sub, '*.shp') if len(sub_shps) == k_value: print2file( log, "subset of shapefile already exist, skip creating new") else: create_shp_subset_polygons(dir_sub, training_shpAll, file_name, k_value) ################################################################## # training on k subset for idx in range(1, k_value + 1): # remove previous trained model (the setting are the same to exp10) if os.path.isdir(trained_model_dir): io_function.delete_file_or_dir(trained_model_dir) print2file(log, "run training and inference of the %d_th fold" % idx) # replace shape file path in "multi_training_files" io_function.copy_file_to_dst(multi_training_files_allPolygons, multi_training_files, overwrite=True) # replace shape file path in multi_training_files for training_shpAll in training_shp_all: dir = os.path.dirname(training_shpAll) file_name_no_ext = os.path.splitext( os.path.basename(training_shpAll))[0] dir_sub = os.path.join( dir, '%s_%d-fold_cross_val_t%d' % (file_name_no_ext, k_value, test_num)) new_shp_path = os.path.join( dir_sub, '%s_%dfold_%d.shp' % (file_name_no_ext, k_value, idx)) repalce_string_in_file(multi_training_files, training_shpAll, new_shp_path) # modify exe.sh io_function.copy_file_to_dst('exe_template_kfold.sh', 'exe_qtp.sh', overwrite=True) new_line = '%dfold_%d_t%d' % (k_value, idx, test_num) repalce_string_in_file('exe_qtp.sh', 'x_test_num', new_line) # check results existence result_shp = io_function.get_file_list_by_pattern( 'result_backup', '*' + new_line + '*/*.shp') if len(result_shp) > 0: print2file(log, "results of test: %s already exist, skip" % new_line) else: # run training print2file(log, "start: test:%d the %d_th fold" % (test_num, idx)) argslist = ['./exe_qtp.sh'] return_code = basic.exec_command_args_list(argslist) # exit code is not 0, means something wrong, then quit if return_code != 0: sys.exit(return_code) pass
def convert_planet_to_rgb_images(tif_path, save_dir='RGB_images', sr_min=0, sr_max=3000, save_org_dir=None, sharpen=True, rgb_nodata=0): #if multiple processes try to derive the same rgb images, it may have problem. # save output to 'RGB_images' + processID if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) if save_org_dir is not None and os.path.isdir(save_org_dir) is False: io_function.mkdir(save_org_dir) if save_org_dir is not None: copied_org_img_path = os.path.join(save_org_dir, os.path.basename(tif_path)) io_function.copy_file_to_dst(tif_path, copied_org_img_path) # filename_no_ext output = os.path.splitext(os.path.basename(tif_path))[0] if sharpen: fin_output = os.path.join(save_dir, output + '_8bit_rgb_sharpen.tif') else: fin_output = os.path.join(save_dir, output + '_8bit_rgb.tif') if os.path.isfile(fin_output): basic.outputlogMessage( "Skip, because File %s exists in current folder: %s" % (fin_output, os.getcwd())) return fin_output # use fix min and max to make the color be consistent to sentinel-images src_min = sr_min src_max = sr_max dst_min = 1 # 0 is the nodata, so set as 1 dst_max = 255 # gdal_translate -ot Byte -scale ${src_min} ${src_max} ${dst_min} ${dst_max} ${image_path} ${output}_8bit.tif if 'SR.tif' in tif_path: cmd_str = 'gdal_translate -ot Byte -scale %d %d %d %d -of VRT %s %s_8bit.tif' % ( src_min, src_max, dst_min, dst_max, tif_path, output) else: # gdal_contrast_stretch -percentile-range 0.01 0.99 ${output}.tif ${output}_8bit.tif cmd_str = 'gdal_contrast_stretch -percentile-range 0.01 0.99 %s %s_8bit.tif' % ( tif_path, output) status, result = basic.exec_command_string(cmd_str) if status != 0: print(result) sys.exit(status) # the third band is red, second is green, and first is blue #gdal_translate -b 3 -b 2 -b 1 ${output}_8bit.tif ${output}_8bit_rgb.tif cmd_str = 'gdal_translate -b 3 -b 2 -b 1 -of VRT %s_8bit.tif %s_8bit_rgb.tif' % ( output, output) status, result = basic.exec_command_string(cmd_str) if status != 0: print(result) sys.exit(status) # python ${code_dir}/planetScripts/prePlanetImage.py ${output}_8bit_rgb.tif ${fin_output} if sharpen: cmd_str = 'python %s %s_8bit_rgb.tif %s' % (prePlanetImage, output, fin_output) else: # convert from VRT format to tif format cmd_str = 'gdal_translate -of GTiff %s_8bit_rgb.tif %s' % (output, fin_output) status, result = basic.exec_command_string(cmd_str) if status != 0: print(result) sys.exit(status) # set nodata # gdal_edit.py -a_nodata 0 ${fin_output} cmd_str = 'gdal_edit.py -a_nodata %d %s' % (rgb_nodata, fin_output) status, result = basic.exec_command_string(cmd_str) if status != 0: print(result) sys.exit(status) io_function.delete_file_or_dir('%s_8bit.tif' % output) io_function.delete_file_or_dir('%s_8bit_rgb.tif' % output) return fin_output
def train_evaluation_deeplab(WORK_DIR, deeplab_dir, expr_name, para_file, network_setting_ini, gpu_num): # prepare training folder EXP_FOLDER = expr_name INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models') TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train') EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval') VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis') EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export') io_function.mkdir(INIT_FOLDER) io_function.mkdir(TRAIN_LOGDIR) io_function.mkdir(EVAL_LOGDIR) io_function.mkdir(VIS_LOGDIR) io_function.mkdir(EXPORT_DIR) # prepare the tensorflow check point (pretrained model) for training pre_trained_dir = parameters.get_directory_None_if_absence( network_setting_ini, 'pre_trained_model_folder') pre_trained_tar = parameters.get_string_parameters(network_setting_ini, 'TF_INIT_CKPT') pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar) if os.path.isfile(pre_trained_path) is False: print('pre-trained model: %s not exist, try to download' % pre_trained_path) # try to download the file pre_trained_url = parameters.get_string_parameters_None_if_absence( network_setting_ini, 'pre_trained_model_url') res = os.system('wget %s ' % pre_trained_url) if res != 0: sys.exit(1) io_function.movefiletodir(pre_trained_tar, pre_trained_dir) # unpack pre-trained model to INIT_FOLDER os.chdir(INIT_FOLDER) res = os.system('tar -xf %s' % pre_trained_path) if res != 0: raise IOError('failed to unpack %s' % pre_trained_path) os.chdir(WORK_DIR) dataset_dir = os.path.join(WORK_DIR, 'tfrecord') batch_size = parameters.get_digit_parameters(network_setting_ini, 'batch_size', 'int') # maximum iteration number iteration_num = parameters.get_digit_parameters(network_setting_ini, 'iteration_num', 'int') base_learning_rate = parameters.get_digit_parameters( network_setting_ini, 'base_learning_rate', 'float') train_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_output_stride', 'int') train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates1', 'int') train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates2', 'int') train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates3', 'int') inf_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_output_stride', 'int') inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates1', 'int') inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates2', 'int') inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates3', 'int') # depth_multiplier default is 1.0. depth_multiplier = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'depth_multiplier', 'float') decoder_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'decoder_output_stride', 'int') aspp_convs_filters = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'aspp_convs_filters', 'int') train_script = os.path.join(deeplab_dir, 'train.py') train_split = os.path.splitext( parameters.get_string_parameters(para_file, 'training_sample_list_txt'))[0] model_variant = parameters.get_string_parameters(network_setting_ini, 'model_variant') checkpoint = parameters.get_string_parameters(network_setting_ini, 'tf_initial_checkpoint') init_checkpoint_files = io_function.get_file_list_by_pattern( INIT_FOLDER, checkpoint + '*') if len(init_checkpoint_files) < 1: raise IOError('No initial checkpoint in %s with pattern: %s' % (INIT_FOLDER, checkpoint)) init_checkpoint = os.path.join(INIT_FOLDER, checkpoint) b_early_stopping = parameters.get_bool_parameters(para_file, 'b_early_stopping') b_initialize_last_layer = parameters.get_bool_parameters( para_file, 'b_initialize_last_layer') dataset = parameters.get_string_parameters(para_file, 'dataset_name') num_classes_noBG = parameters.get_digit_parameters_None_if_absence( para_file, 'NUM_CLASSES_noBG', 'int') assert num_classes_noBG != None if b_initialize_last_layer is True: if pre_trained_tar in pre_trained_tar_21_classes: print( 'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21' % pre_trained_tar) num_classes_noBG = 20 if pre_trained_tar in pre_trained_tar_19_classes: print( 'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19' % pre_trained_tar) num_classes_noBG = 18 num_of_classes = num_classes_noBG + 1 image_crop_size = parameters.get_string_list_parameters( para_file, 'image_crop_size') if len(image_crop_size) != 2 and image_crop_size[0].isdigit( ) and image_crop_size[1].isdigit(): raise ValueError('image_crop_size should be height,width') crop_size_str = ','.join(image_crop_size) evl_script = os.path.join(deeplab_dir, 'eval.py') evl_split = os.path.splitext( parameters.get_string_parameters(para_file, 'validation_sample_list_txt'))[0] max_eva_number = 1 # validation interval (epoch) validation_interval = parameters.get_digit_parameters_None_if_absence( para_file, 'validation_interval', 'int') train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file) iter_per_epoch = math.ceil(train_count / batch_size) total_epoches = math.ceil(iteration_num / iter_per_epoch) already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) if already_trained_iteration >= iteration_num: basic.outputlogMessage('Training already run %d iterations, skip' % already_trained_iteration) return True if validation_interval is None: basic.outputlogMessage( 'No input validation_interval, so training to %d, then evaluating in the end' % iteration_num) # run training train_deeplab(train_script, dataset, train_split, num_of_classes, base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2, train_atrous_rates3, train_output_stride, crop_size_str, batch_size, iteration_num, depth_multiplier, decoder_output_stride, aspp_convs_filters, b_initialize_last_layer) # run evaluation evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes, model_variant, inf_atrous_rates1, inf_atrous_rates2, inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number, depth_multiplier, decoder_output_stride, aspp_convs_filters) miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) get_loss_learning_rate_list(TRAIN_LOGDIR) else: basic.outputlogMessage( 'training to the maximum iteration of %d, and evaluating very %d epoch(es)' % (iteration_num, validation_interval)) for epoch in range(validation_interval, total_epoches + validation_interval, validation_interval): to_iter_num = min(epoch * iter_per_epoch, iteration_num) if to_iter_num <= already_trained_iteration: continue basic.outputlogMessage( 'training and evaluating to %d epoches (to iteration: %d)' % (epoch, to_iter_num)) # run training train_deeplab(train_script, dataset, train_split, num_of_classes, base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2, train_atrous_rates3, train_output_stride, crop_size_str, batch_size, to_iter_num, depth_multiplier, decoder_output_stride, aspp_convs_filters, b_initialize_last_layer) # run evaluation evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes, model_variant, inf_atrous_rates1, inf_atrous_rates2, inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number, depth_multiplier, decoder_output_stride, aspp_convs_filters) # get miou miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) # save loss value to disk get_loss_learning_rate_list(TRAIN_LOGDIR) # check if need to early stopping if b_early_stopping: if len(miou_dict['overall']) >= 5: # if the last five miou did not improve, then stop training if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005 ): # 0.0001 (%0.01) # 0.5 % basic.outputlogMessage( 'early stopping: stop training because overall miou did not improved in the last five evaluation' ) output_early_stopping_message(TRAIN_LOGDIR) break # plot mIOU, loss, and learnint rate curves iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt') loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt') miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main( iou_path, train_count=train_count, val_count=val_count, batch_size=batch_size) loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main( loss_path, train_count=train_count, val_count=val_count, batch_size=batch_size) # backup miou and training_loss & learning rate test_id = os.path.basename(WORK_DIR) + '_' + expr_name backup_dir = os.path.join(WORK_DIR, 'result_backup') if os.path.isdir(backup_dir) is False: io_function.mkdir(backup_dir) new_iou_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(iou_path)) io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True) miou_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(miou_curve_path)) io_function.copy_file_to_dst(miou_curve_path, miou_curve_bakname, overwrite=True) loss_new_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(loss_path)) io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True) loss_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(loss_curve_path)) io_function.copy_file_to_dst(loss_curve_path, loss_curve_bakname, overwrite=True)
def mask_dem_by_surface_water(crop_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num): # get list of the ArcticDEM mosaic water_mask_tifs = io_function.get_file_list_by_ext('.tif',mask_water_dir,bsub_folder=False) water_mask_ext_polys = get_dem_tif_ext_polygons(water_mask_tifs) overlap_index = vector_gpd.get_poly_index_within_extent(water_mask_ext_polys,extent_poly) #### crop and mosaic water mask sub_mosaic_dem_tifs = [water_mask_tifs[item] for item in overlap_index] water_mask_crop_tif_list = [] for tif in sub_mosaic_dem_tifs: save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) ) if os.path.isfile(save_crop_path): basic.outputlogMessage('%s exists, skip' % save_crop_path) water_mask_crop_tif_list.append(save_crop_path) else: crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near', o_format='VRT',out_res=o_res, same_extent=True,thread_num=process_num) # if crop_tif is False: raise ValueError('warning, crop %s failed' % tif) water_mask_crop_tif_list.append(crop_tif) if len(water_mask_crop_tif_list) < 1: basic.outputlogMessage('No water mask for %d grid'%extent_id) save_id_grid_no_watermask(extent_id) return None # create mosaic, can handle only input one file, but is slow save_water_mask_mosaic = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.tif'%extent_id) result = RSImageProcess.mosaic_crop_images_gdalwarp(water_mask_crop_tif_list, save_water_mask_mosaic, resampling_method='average',o_format='GTiff', compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num) if result is False: return False # because the resolution of dem and water mask is different, so we polygonize the watermask, then burn into the dem water_mask_shp = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.shp'%extent_id) if os.path.isfile(water_mask_shp): basic.outputlogMessage('%s exists, skip cropping' % water_mask_shp) else: # set 0 as nodata if raster_io.set_nodata_to_raster_metadata(save_water_mask_mosaic,0) is False: return False if vector_gpd.raster2shapefile(save_water_mask_mosaic,water_mask_shp,connect8=True) is None: return False # masking the strip version of DEMs mask_dem_list = [] for idx, strip_dem in enumerate(crop_dem_list): save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskWater') if os.path.isfile(save_path): basic.outputlogMessage('%s exist, skip'%save_path) mask_dem_list.append(save_path) continue io_function.copy_file_to_dst(strip_dem,save_path,overwrite=True) nodata = raster_io.get_nodata(save_path) if raster_io.burn_polygon_to_raster_oneband(save_path,water_mask_shp,nodata) is False: continue mask_dem_list.append(save_path) return mask_dem_list
def copy_curc_job_files(sh_dir, work_dir, sh_list): for sh in sh_list: io_function.copy_file_to_dst(os.path.join(sh_dir, sh), os.path.join(work_dir, sh)) #, overwrite=True
def postProcess(para_file, inf_post_note, b_skip_getshp=False, test_id=None): # test_id is the related to training if os.path.isfile(para_file) is False: raise IOError('File %s not exists in current folder: %s' % (para_file, os.getcwd())) # the test string in 'exe.sh' test_note = inf_post_note WORK_DIR = os.getcwd() SECONDS = time.time() expr_name = parameters.get_string_parameters(para_file, 'expr_name') network_setting_ini = parameters.get_string_parameters( para_file, 'network_setting_ini') inf_dir = parameters.get_directory(para_file, 'inf_output_dir') if test_id is None: test_id = os.path.basename(WORK_DIR) + '_' + expr_name # get name of inference areas multi_inf_regions = parameters.get_string_list_parameters( para_file, 'inference_regions') # run post-processing parallel # max_parallel_postProc_task = 8 backup_dir = os.path.join(WORK_DIR, 'result_backup') io_function.mkdir(backup_dir) # loop each inference regions sub_tasks = [] same_area_time_inis = group_same_area_time_observations(multi_inf_regions) region_eva_reports = {} for key in same_area_time_inis.keys(): multi_observations = same_area_time_inis[key] area_name = parameters.get_string_parameters( multi_observations[0], 'area_name') # they have the same name and time area_time = parameters.get_string_parameters(multi_observations[0], 'area_time') merged_shp_list = [] map_raster_list_2d = [None] * len(multi_observations) for area_idx, area_ini in enumerate(multi_observations): area_remark = parameters.get_string_parameters( area_ini, 'area_remark') area_save_dir, shp_pre, _ = get_observation_save_dir_shp_pre( inf_dir, area_name, area_time, area_remark, test_id) # get image list inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir') # it is ok consider a file name as pattern and pass it the following functions to get file list inf_image_or_pattern = parameters.get_string_parameters( area_ini, 'inf_image_or_pattern') inf_img_list = io_function.get_file_list_by_pattern( inf_image_dir, inf_image_or_pattern) img_count = len(inf_img_list) if img_count < 1: raise ValueError( 'No image for inference, please check inf_image_dir and inf_image_or_pattern in %s' % area_ini) merged_shp = os.path.join(WORK_DIR, area_save_dir, shp_pre + '.shp') if b_skip_getshp: pass else: # post image one by one result_shp_list = [] map_raster_list = [] for img_idx, img_path in enumerate(inf_img_list): out_shp, out_raster = inf_results_to_shapefile( WORK_DIR, img_idx, area_save_dir, test_id) if out_shp is None or out_raster is None: continue result_shp_list.append(os.path.join(WORK_DIR, out_shp)) map_raster_list.append(out_raster) # merge shapefiles if merge_shape_files(result_shp_list, merged_shp) is False: continue map_raster_list_2d[area_idx] = map_raster_list merged_shp_list.append(merged_shp) if b_skip_getshp is False: # add occurrence to each polygons get_occurence_for_multi_observation(merged_shp_list) for area_idx, area_ini in enumerate(multi_observations): area_remark = parameters.get_string_parameters( area_ini, 'area_remark') area_save_dir, shp_pre, area_remark_time = get_observation_save_dir_shp_pre( inf_dir, area_name, area_time, area_remark, test_id) merged_shp = os.path.join(WORK_DIR, area_save_dir, shp_pre + '.shp') if os.path.isfile(merged_shp) is False: print('Warning, %s not exist, skip' % merged_shp) continue # add attributes to shapefile # add_attributes_script = os.path.join(code_dir,'datasets', 'get_polygon_attributes.py') shp_attributes = os.path.join(WORK_DIR, area_save_dir, shp_pre + '_post_NOrm.shp') # add_polygon_attributes(add_attributes_script,merged_shp, shp_attributes, para_file, area_ini ) add_polygon_attributes(merged_shp, shp_attributes, para_file, area_ini) # remove polygons # rm_polygon_script = os.path.join(code_dir,'datasets', 'remove_mappedPolygons.py') shp_post = os.path.join(WORK_DIR, area_save_dir, shp_pre + '_post.shp') # remove_polygons(rm_polygon_script,shp_attributes, shp_post, para_file) remove_polygons_main(shp_attributes, shp_post, para_file) # evaluate the mapping results # eval_shp_script = os.path.join(code_dir,'datasets', 'evaluation_result.py') out_report = os.path.join(WORK_DIR, area_save_dir, shp_pre + '_evaluation_report.txt') # evaluation_polygons(eval_shp_script, shp_post, para_file, area_ini,out_report) evaluation_polygons(shp_post, para_file, area_ini, out_report) ##### copy and backup files ###### # copy files to result_backup if len(test_note) > 0: backup_dir_area = os.path.join( backup_dir, area_name + '_' + area_remark_time + '_' + test_id + '_' + test_note) else: backup_dir_area = os.path.join( backup_dir, area_name + '_' + area_remark_time + '_' + test_id) io_function.mkdir(backup_dir_area) if len(test_note) > 0: bak_merged_shp = os.path.join( backup_dir_area, '_'.join([shp_pre, test_note]) + '.shp') bak_post_shp = os.path.join( backup_dir_area, '_'.join([shp_pre, 'post', test_note]) + '.shp') bak_eva_report = os.path.join( backup_dir_area, '_'.join([shp_pre, 'eva_report', test_note]) + '.txt') bak_area_ini = os.path.join( backup_dir_area, '_'.join([shp_pre, 'region', test_note]) + '.ini') else: bak_merged_shp = os.path.join(backup_dir_area, '_'.join([shp_pre]) + '.shp') bak_post_shp = os.path.join( backup_dir_area, '_'.join([shp_pre, 'post']) + '.shp') bak_eva_report = os.path.join( backup_dir_area, '_'.join([shp_pre, 'eva_report']) + '.txt') bak_area_ini = os.path.join( backup_dir_area, '_'.join([shp_pre, 'region']) + '.ini') io_function.copy_shape_file(merged_shp, bak_merged_shp) io_function.copy_shape_file(shp_post, bak_post_shp) if os.path.isfile(out_report): io_function.copy_file_to_dst(out_report, bak_eva_report, overwrite=True) io_function.copy_file_to_dst(area_ini, bak_area_ini, overwrite=True) # copy map raster b_backup_map_raster = parameters.get_bool_parameters_None_if_absence( area_ini, 'b_backup_map_raster') if b_backup_map_raster is True: if map_raster_list_2d[area_idx] is not None: for map_tif in map_raster_list_2d[area_idx]: bak_map_tif = os.path.join(backup_dir_area, os.path.basename(map_tif)) io_function.copy_file_to_dst(map_tif, bak_map_tif, overwrite=True) region_eva_reports[shp_pre] = bak_eva_report if len(test_note) > 0: bak_para_ini = os.path.join( backup_dir, '_'.join([test_id, 'para', test_note]) + '.ini') bak_network_ini = os.path.join( backup_dir, '_'.join([test_id, 'network', test_note]) + '.ini') bak_time_cost = os.path.join( backup_dir, '_'.join([test_id, 'time_cost', test_note]) + '.txt') else: bak_para_ini = os.path.join(backup_dir, '_'.join([test_id, 'para']) + '.ini') bak_network_ini = os.path.join(backup_dir, '_'.join([test_id, 'network']) + '.ini') bak_time_cost = os.path.join(backup_dir, '_'.join([test_id, 'time_cost']) + '.txt') io_function.copy_file_to_dst(para_file, bak_para_ini) io_function.copy_file_to_dst(network_setting_ini, bak_network_ini) if os.path.isfile('time_cost.txt'): io_function.copy_file_to_dst('time_cost.txt', bak_time_cost) # output the evaluation report to screen for key in region_eva_reports.keys(): report = region_eva_reports[key] if os.path.isfile(report) is False: continue print('evaluation report for %s:' % key) os.system('head -n 7 %s' % report) # output evaluation report to table if len(test_note) > 0: out_table = os.path.join( backup_dir, '_'.join([test_id, 'accuracy_table', test_note]) + '.xlsx') else: out_table = os.path.join( backup_dir, '_'.join([test_id, 'accuracy_table']) + '.xlsx') eva_reports = [ region_eva_reports[key] for key in region_eva_reports if os.path.isfile(region_eva_reports[key]) ] eva_report_to_tables.eva_reports_to_table(eva_reports, out_table) duration = time.time() - SECONDS os.system( 'echo "$(date): time cost of post-procesing: %.2f seconds">>time_cost.txt' % duration)
def get_subimages_SpaceNet(input_image_dir, image_pattern, input_polygon_dir, polygon_pattern, subImage_dir, subLabel_dir, process_num=1, burn_value=1, b_no_label_image=False): sub_images_list = io_function.get_file_list_by_pattern( input_image_dir, image_pattern) if len(sub_images_list) < 1: basic.outputlogMessage('No sub-images in: %s with pattern: %s' % (input_image_dir, image_pattern)) return False sub_images_count = len(sub_images_list) # do we need to check the projection of each sub-images? if os.path.isdir(subLabel_dir) is False: io_function.mkdir(subLabel_dir) if os.path.isdir(subImage_dir) is False: io_function.mkdir(subImage_dir) label_path_list = [] if b_no_label_image is True: pass else: # polygon file list polygon_files_list = io_function.get_file_list_by_pattern( input_polygon_dir, polygon_pattern) if len(polygon_files_list) < 1: basic.outputlogMessage('No polygon files in: %s with pattern: %s' % (input_polygon_dir, polygon_pattern)) return False polygon_name_list = [ os.path.basename(item) for item in polygon_files_list ] # create label images for idx, tif_path in enumerate(sub_images_list): print('%d / %d create label raster for %s' % (idx, sub_images_count, tif_path)) # find polygon file poly_path = find_corresponding_geojson_SpaceNet( tif_path, polygon_files_list, polygon_name_list) if poly_path is None: print('Warning, cannot find corresponding polygon files') continue save_path = os.path.join( subLabel_dir, io_function.get_name_no_ext(poly_path) + '.tif') if os.path.isfile(save_path): print('warning, %s already exists, skip' % save_path) label_path_list.append(save_path) continue if rasterize_polygons_to_ref_raster(tif_path, poly_path, burn_value, None, save_path, datatype='Byte', ignore_edge=True) is True: label_path_list.append(save_path) # copy sub-images, adding to txt files with open('sub_images_labels_list.txt', 'a') as f_obj: for tif_path, label_file in zip(sub_images_list, label_path_list): if label_file is None: continue dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path)) # copy sub-images io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=False) sub_image_label_str = dst_subImg + ":" + label_file + '\n' f_obj.writelines(sub_image_label_str) return True
def train_evaluation_deeplab_separate(WORK_DIR, deeplab_dir, expr_name, para_file, network_setting_ini, gpu_num): ''' in "train_evaluation_deeplab", run training, stop, then evaluation, then traininng, make learning rate strange, and the results worse. so in this function, we start two process, one for training, another for evaluation (run on CPU) ''' # prepare training folder EXP_FOLDER = expr_name INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models') TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train') EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval') VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis') EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export') io_function.mkdir(INIT_FOLDER) io_function.mkdir(TRAIN_LOGDIR) io_function.mkdir(EVAL_LOGDIR) io_function.mkdir(VIS_LOGDIR) io_function.mkdir(EXPORT_DIR) # prepare the tensorflow check point (pretrained model) for training pre_trained_dir = parameters.get_directory_None_if_absence( network_setting_ini, 'pre_trained_model_folder') pre_trained_tar = parameters.get_string_parameters(network_setting_ini, 'TF_INIT_CKPT') pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar) if os.path.isfile(pre_trained_path) is False: print('pre-trained model: %s not exist, try to download' % pre_trained_path) # try to download the file pre_trained_url = parameters.get_string_parameters_None_if_absence( network_setting_ini, 'pre_trained_model_url') res = os.system('wget %s ' % pre_trained_url) if res != 0: sys.exit(1) io_function.movefiletodir(pre_trained_tar, pre_trained_dir) # unpack pre-trained model to INIT_FOLDER os.chdir(INIT_FOLDER) res = os.system('tar -xf %s' % pre_trained_path) if res != 0: raise IOError('failed to unpack %s' % pre_trained_path) os.chdir(WORK_DIR) dataset_dir = os.path.join(WORK_DIR, 'tfrecord') batch_size = parameters.get_digit_parameters(network_setting_ini, 'batch_size', 'int') # maximum iteration number iteration_num = parameters.get_digit_parameters(network_setting_ini, 'iteration_num', 'int') base_learning_rate = parameters.get_digit_parameters( network_setting_ini, 'base_learning_rate', 'float') train_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_output_stride', 'int') train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates1', 'int') train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates2', 'int') train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates3', 'int') inf_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_output_stride', 'int') inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates1', 'int') inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates2', 'int') inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates3', 'int') # depth_multiplier default is 1.0. depth_multiplier = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'depth_multiplier', 'float') decoder_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'decoder_output_stride', 'int') aspp_convs_filters = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'aspp_convs_filters', 'int') train_script = os.path.join(deeplab_dir, 'train.py') train_split = os.path.splitext( parameters.get_string_parameters(para_file, 'training_sample_list_txt'))[0] model_variant = parameters.get_string_parameters(network_setting_ini, 'model_variant') checkpoint = parameters.get_string_parameters(network_setting_ini, 'tf_initial_checkpoint') init_checkpoint_files = io_function.get_file_list_by_pattern( INIT_FOLDER, checkpoint + '*') if len(init_checkpoint_files) < 1: raise IOError('No initial checkpoint in %s with pattern: %s' % (INIT_FOLDER, checkpoint)) init_checkpoint = os.path.join(INIT_FOLDER, checkpoint) b_early_stopping = parameters.get_bool_parameters(para_file, 'b_early_stopping') b_initialize_last_layer = parameters.get_bool_parameters( para_file, 'b_initialize_last_layer') dataset = parameters.get_string_parameters(para_file, 'dataset_name') num_classes_noBG = parameters.get_digit_parameters_None_if_absence( para_file, 'NUM_CLASSES_noBG', 'int') assert num_classes_noBG != None if b_initialize_last_layer is True: if pre_trained_tar in pre_trained_tar_21_classes: print( 'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21' % pre_trained_tar) num_classes_noBG = 20 if pre_trained_tar in pre_trained_tar_19_classes: print( 'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19' % pre_trained_tar) num_classes_noBG = 18 num_of_classes = num_classes_noBG + 1 image_crop_size = parameters.get_string_list_parameters( para_file, 'image_crop_size') if len(image_crop_size) != 2 and image_crop_size[0].isdigit( ) and image_crop_size[1].isdigit(): raise ValueError('image_crop_size should be height,width') crop_size_str = ','.join(image_crop_size) # validation interval (epoch), do # validation_interval = parameters.get_digit_parameters_None_if_absence(para_file,'validation_interval','int') train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file) iter_per_epoch = math.ceil(train_count / batch_size) total_epoches = math.ceil(iteration_num / iter_per_epoch) already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) if already_trained_iteration >= iteration_num: basic.outputlogMessage('Training already run %d iterations, skip' % already_trained_iteration) return True save_interval_secs = 1200 # default is 1200 second for saving model save_summaries_secs = 600 # default is 600 second for saving summaries eval_interval_secs = save_interval_secs # default is 300 second for running evaluation, if no new saved model, no need to run evaluation? train_process = Process( target=train_deeplab, args=(train_script, dataset, train_split, num_of_classes, base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2, train_atrous_rates3, train_output_stride, crop_size_str, batch_size, iteration_num, depth_multiplier, decoder_output_stride, aspp_convs_filters, b_initialize_last_layer)) train_process.start() time.sleep(60) # wait if train_process.exitcode is not None and train_process.exitcode != 0: sys.exit(1) # eval_process.start() # time.sleep(10) # wait # if eval_process.exitcode is not None and eval_process.exitcode != 0: # sys.exit(1) while True: # only run evaluation when there is new trained model already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) basic.outputlogMessage( 'Already trained iteration: %d, latest evaluation at %d step' % (already_trained_iteration, miou_dict['step'][-1])) if already_trained_iteration > miou_dict['step'][-1]: # run evaluation and wait until it finished gpuid = "" # set gpuid to empty string, making evaluation run on CPU evl_script = os.path.join(deeplab_dir, 'eval.py') evl_split = os.path.splitext( parameters.get_string_parameters( para_file, 'validation_sample_list_txt'))[0] # max_eva_number = -1 # run as many evaluation as possible, --eval_interval_secs (default is 300 seconds) max_eva_number = 1 # only run once inside the while loop, use while loop to control multiple evaluation eval_process = Process( target=evaluation_deeplab, args=(evl_script, dataset, evl_split, num_of_classes, model_variant, inf_atrous_rates1, inf_atrous_rates2, inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number, depth_multiplier, decoder_output_stride, aspp_convs_filters, gpuid, eval_interval_secs)) eval_process.start( ) # put Process inside while loop to avoid error: AssertionError: cannot start a process twice while eval_process.is_alive(): time.sleep(5) # check if need early stopping if b_early_stopping: print(datetime.now(), 'check early stopping') miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) if 'overall' in miou_dict.keys() and len( miou_dict['overall']) >= 5: # if the last five miou did not improve, then stop training if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005 ): # 0.0001 (%0.01) # 0.5 % basic.outputlogMessage( 'early stopping: stop training because overall miou did not improved in the last five evaluation' ) output_early_stopping_message(TRAIN_LOGDIR) # train_process.kill() # this one seems not working # subprocess pid different from ps output # https://stackoverflow.com/questions/4444141/subprocess-pid-different-from-ps-output # os.system('kill ' + str(train_process.pid)) # still not working. train_process.pid is not the one output by ps -aux # train_process.terminate() # Note that descendant processes of the process will not be terminated # train_process.join() # Wait until child process terminates with open('train_py_pid.txt', 'r') as f_obj: lines = f_obj.readlines() train_pid = int(lines[0].strip()) os.system('kill ' + str(train_pid)) basic.outputlogMessage( 'kill training processing with id: %d' % train_pid) break # this breaks the while loop, making that it may not evaluate on some new saved model. # if the evaluation step is less than saved model iteration, run another iteration again immediately already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) if already_trained_iteration > miou_dict['step'][-1]: continue # if finished training if train_process.is_alive() is False: break # # if eval_process exit, then quit training as well # if eval_process.is_alive() is False and train_process.is_alive(): # train_process.kill() # break time.sleep(eval_interval_secs) # wait for next evaluation # save loss value to disk get_loss_learning_rate_list(TRAIN_LOGDIR) # get miou again miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) # eval_process did not exit as expected, kill it again. # os.system('kill ' + str(eval_process.pid)) # get iou and backup iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt') loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt') patch_info = os.path.join(WORK_DIR, 'sub_images_patches_info.txt') # backup miou and training_loss & learning rate test_id = os.path.basename(WORK_DIR) + '_' + expr_name backup_dir = os.path.join(WORK_DIR, 'result_backup') if os.path.isdir(backup_dir) is False: io_function.mkdir(backup_dir) new_iou_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(iou_path)) io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True) loss_new_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(loss_path)) io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True) new_patch_info = os.path.join(backup_dir, test_id + '_' + os.path.basename(patch_info)) io_function.copy_file_to_dst(patch_info, new_patch_info, overwrite=True) # plot mIOU, loss, and learnint rate curves, and backup miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main( iou_path, train_count=train_count, val_count=val_count, batch_size=batch_size) loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main( loss_path, train_count=train_count, val_count=val_count, batch_size=batch_size) miou_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(miou_curve_path)) io_function.copy_file_to_dst(miou_curve_path, miou_curve_bakname, overwrite=True) loss_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(loss_curve_path)) io_function.copy_file_to_dst(loss_curve_path, loss_curve_bakname, overwrite=True)
def split_train_val(para_file): print("split data set into training and validation") if os.path.isfile(para_file) is False: raise IOError('File %s not exists in current folder: %s' % (para_file, os.getcwd())) code_dir = os.path.join(os.path.dirname(sys.argv[0]), '..') sys.path.insert(0, code_dir) import parameters script = os.path.join(code_dir, 'datasets', 'train_test_split.py') training_data_per = parameters.get_digit_parameters_None_if_absence(para_file, 'training_data_per','float') train_sample_txt = parameters.get_string_parameters(para_file, 'training_sample_list_txt') val_sample_txt = parameters.get_string_parameters(para_file, 'validation_sample_list_txt') dir = 'list' all_img_list = os.path.join(dir,'trainval.txt') # command_string = script + ' -p ' + str(training_data_per) + \ # ' -t ' + train_sample_txt + \ # ' -v ' + val_sample_txt + \ # ' --shuffle ' + all_img_list # res = os.system(command_string) # if res!=0: # sys.exit(1) if training_data_per is None: # similar to VOC dataset, we only used 1449 images for validation (because the data also used for training, # so it is training accuracy, not validation accuracy) with open(all_img_list, 'r') as f_obj: file_names = f_obj.readlines() if len(file_names) < 1449: # val.txt is identical to trainval.txt io_function.copy_file_to_dst(all_img_list,os.path.join(dir,train_sample_txt)) io_function.copy_file_to_dst(all_img_list,os.path.join(dir,val_sample_txt)) else: io_function.copy_file_to_dst(all_img_list, os.path.join(dir, train_sample_txt)) # randomly get 1449 image from trainval.txt import random sel_file_index = random.sample(range(len(file_names)), 1449) # get a list of number without duplicates with open(os.path.join(dir, val_sample_txt), 'w') as w_obj: sel_file_names = [file_names[item] for item in sel_file_index] w_obj.writelines(sel_file_names) else: # split training and validation datasets Do_shuffle = True from datasets.train_test_split import train_test_split_main train_test_split_main(all_img_list,training_data_per,Do_shuffle,train_sample_txt,val_sample_txt) # save brief information of image patches img_ext = parameters.get_string_parameters_None_if_absence(para_file, 'split_image_format') get_image_with_height_list(os.path.join(dir,train_sample_txt), img_ext, info_type='training') get_image_with_height_list(os.path.join(dir,val_sample_txt), img_ext, info_type='validation') # save the count of each classes in training and validation get_sample_count_of_each_class(os.path.join(dir, train_sample_txt), info_type='training') get_sample_count_of_each_class(os.path.join(dir, val_sample_txt), info_type='validation')