def create_shp_subset_polygons(dir_sub, training_shpAll, file_name, k_value): curr_dir = os.getcwd() # change the running folder os.chdir(dir_sub) args_list = [ get_training_polygons_script, training_shpAll, file_name, '-k', str(k_value) ] basic.exec_command_args_list(args_list) # change back os.chdir(curr_dir) return True
def unpack_tar_gz_file(file_path, work_dir): ''' unpack a *.tar.gz package, the same to decompress_gz_file (has a bug) :param file_path: :param work_dir: :return: the absolute path of a folder which contains the decompressed files ''' if os.path.isdir(work_dir) is False: raise IOError('dir %s not exist' % os.path.abspath(work_dir)) if file_path.endswith('.tar.gz') is False: raise ValueError('input %s do not end with .tar.gz') file_basename = os.path.basename(file_path)[:-7] # decompression file and remove it dst_folder = os.path.join(work_dir, file_basename) if os.path.isdir(dst_folder) and len( os.listdir(dst_folder)) > 1: # on Mac, .DS_Store count one file. basic.outputlogMessage('%s exists and is not empty, skip unpacking' % dst_folder) return dst_folder else: mkdir(dst_folder) # CommandString = 'tar -xvf ' + file_tar + ' -C ' + dst_folder args_list = ['tar', '-zxvf', file_path, '-C', dst_folder] # (status, result) = basic.exec_command_string(CommandString) returncode = basic.exec_command_args_list(args_list) # print(returncode) if returncode != 0: return False return dst_folder
def decompress_gz_file(file_path,work_dir,bkeepmidfile): """ decompress a compressed file with gz extension Args: file_path:the path of gz file bkeepmidfile: indicate whether keep the middle file(eg *.tar file) Returns:the absolute path of a folder which contains the decompressed files """ if os.path.isdir(work_dir) is False: basic.outputlogMessage('dir %s not exist'%os.path.abspath(work_dir)) return False file_basename = os.path.basename(file_path).split('.')[0] # file_tar = os.path.join(os.path.abspath(work_dir), file_basename + ".tar") file_tar = os.path.join(os.path.dirname(file_path), file_basename + ".tar") # decompression file and keep it # CommandString = 'gzip -dk ' + landsatfile # change commond line like below, bucause gzip version on cry01 do not have the -k option by hlc 2015.12.26 # CommandString = 'gzip -dc ' + file_path + ' > ' + file_tar args_list = ['gzip','-dk',file_path] # (status, result) = basic.exec_command_string(CommandString) # if status != 0: # basic.outputlogMessage(result) # return False if os.path.isfile(file_tar): basic.outputlogMessage('%s already exist') else: basic.exec_command_args_list(args_list) # decompression file and remove it dst_folder = os.path.join(os.path.abspath(work_dir),file_basename) mkdir(dst_folder) # CommandString = 'tar -xvf ' + file_tar + ' -C ' + dst_folder args_list = ['tar', '-xvf', file_tar,'-C',dst_folder] # (status, result) = basic.exec_command_string(CommandString) basic.exec_command_args_list(args_list) # if status != 0: # basic.outputlogMessage(result) # return False if bkeepmidfile is False: os.remove(file_tar) return dst_folder
def train_kfold_cross_val(multi_training_files_allPolygons, multi_training_files, k_value, test_num): ################################################################## # get subset of polygons training_shp_all = [] with open(multi_training_files_allPolygons, 'r') as f_obj: training_lines = f_obj.readlines() for line in training_lines: line = line.strip() training_shp_all.append( line.split(':')[-1]) # the last one is the shape file for training_shpAll in training_shp_all: dir = os.path.dirname(training_shpAll) file_name = os.path.basename(training_shpAll) file_name_no_ext = os.path.splitext(file_name)[0] dir_sub = os.path.join( dir, '%s_%d-fold_cross_val_t%d' % (file_name_no_ext, k_value, test_num)) if os.path.isdir(dir_sub) is False: # will save to dir_sub} io_function.mkdir(dir_sub) create_shp_subset_polygons(dir_sub, training_shpAll, file_name, k_value) else: # check shape file existence sub_shps = io_function.get_file_list_by_pattern(dir_sub, '*.shp') if len(sub_shps) == k_value: print2file( log, "subset of shapefile already exist, skip creating new") else: create_shp_subset_polygons(dir_sub, training_shpAll, file_name, k_value) ################################################################## # training on k subset for idx in range(1, k_value + 1): # remove previous trained model (the setting are the same to exp10) if os.path.isdir(trained_model_dir): io_function.delete_file_or_dir(trained_model_dir) print2file(log, "run training and inference of the %d_th fold" % idx) # replace shape file path in "multi_training_files" io_function.copy_file_to_dst(multi_training_files_allPolygons, multi_training_files, overwrite=True) # replace shape file path in multi_training_files for training_shpAll in training_shp_all: dir = os.path.dirname(training_shpAll) file_name_no_ext = os.path.splitext( os.path.basename(training_shpAll))[0] dir_sub = os.path.join( dir, '%s_%d-fold_cross_val_t%d' % (file_name_no_ext, k_value, test_num)) new_shp_path = os.path.join( dir_sub, '%s_%dfold_%d.shp' % (file_name_no_ext, k_value, idx)) repalce_string_in_file(multi_training_files, training_shpAll, new_shp_path) # modify exe.sh io_function.copy_file_to_dst('exe_template_kfold.sh', 'exe_qtp.sh', overwrite=True) new_line = '%dfold_%d_t%d' % (k_value, idx, test_num) repalce_string_in_file('exe_qtp.sh', 'x_test_num', new_line) # check results existence result_shp = io_function.get_file_list_by_pattern( 'result_backup', '*' + new_line + '*/*.shp') if len(result_shp) > 0: print2file(log, "results of test: %s already exist, skip" % new_line) else: # run training print2file(log, "start: test:%d the %d_th fold" % (test_num, idx)) argslist = ['./exe_qtp.sh'] return_code = basic.exec_command_args_list(argslist) # exit code is not 0, means something wrong, then quit if return_code != 0: sys.exit(return_code) pass