Exemple #1
0
def main(unused_argv):
    dataset_splits = []

    train_sample_txt = parameters.get_string_parameters_None_if_absence(para_file,'training_sample_list_txt')
    if train_sample_txt is not None:
        train_sample_txt = os.path.join(FLAGS.list_folder, train_sample_txt)
        if os.path.isfile(train_sample_txt) is False:
            raise IOError('%s does not exist' % train_sample_txt)
        dataset_splits.append(train_sample_txt)

    val_sample_txt = parameters.get_string_parameters_None_if_absence(para_file, 'validation_sample_list_txt')
    if val_sample_txt is not None:
        val_sample_txt = os.path.join(FLAGS.list_folder, val_sample_txt)
        if os.path.isfile(val_sample_txt) is False:
            raise IOError('%s does not exist' % val_sample_txt)
        dataset_splits.append(val_sample_txt)
    # else:
    #     raise ValueError('training_sample_list_txt or validation_sample_list_txt are not in %s'%para_file)


    # dataset_splits = [train_sample_txt,val_sample_txt]
    # dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*val.txt'))
    print(dataset_splits)


    for dataset_split in dataset_splits:
        _convert_dataset(dataset_split)
Exemple #2
0
def get_file_path_parameter(parafile, multi_files, one_file):

    multi_files = parameters.get_string_parameters_None_if_absence(
        parafile, multi_files)
    if multi_files is None:
        one_file = parameters.get_string_parameters_None_if_absence(
            parafile, one_file)
    else:
        one_file = io_function.get_path_from_txt_list_index(multi_files)
        one_file = io_function.get_file_path_new_home_folder(one_file)
    return one_file
def remove_previous_data(para_file):

    print("remove previous data or results to run again")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s does not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    subImage_dir = parameters.get_string_parameters_None_if_absence(
        para_file, 'input_train_dir')
    subLabel_dir = parameters.get_string_parameters_None_if_absence(
        para_file, 'input_label_dir')

    if os.path.isdir(subImage_dir):
        io_function.delete_file_or_dir(subImage_dir)
        print('remove %s' % subImage_dir)
    if os.path.isdir(subLabel_dir):
        io_function.delete_file_or_dir(subLabel_dir)
        print('remove %s' % subLabel_dir)

    subImage_dir_delete = subImage_dir + '_delete'
    subLabel_dir_delete = subLabel_dir + '_delete'
    if os.path.isdir(subImage_dir_delete):
        io_function.delete_file_or_dir(subImage_dir_delete)
        print('remove %s' % subImage_dir_delete)
    if os.path.isdir(subLabel_dir_delete):
        io_function.delete_file_or_dir(subLabel_dir_delete)
        print('remove %s ' % subLabel_dir_delete)

    if os.path.isdir('split_images'):
        io_function.delete_file_or_dir('split_images')
        print('remove %s ' % 'split_images')
    if os.path.isdir('split_labels'):
        io_function.delete_file_or_dir('split_labels')
        print('remove %s ' % 'split_labels')

    images_including_aug = os.path.join('list', 'images_including_aug.txt')
    if os.path.isfile(images_including_aug):
        io_function.delete_file_or_dir(images_including_aug)
        print('remove %s ' % 'list/images_including_aug.txt')

    if os.path.isdir('tfrecord'):
        io_function.delete_file_or_dir('tfrecord')
        print('remove %s ' % 'tfrecord')

    if os.path.isfile('sub_images_labels_list.txt'):
        io_function.delete_file_or_dir('sub_images_labels_list.txt')
        print('remove %s ' % 'sub_images_labels_list.txt')

    if os.path.isfile('sub_images_labels_list_noGAN.txt'):
        io_function.delete_file_or_dir('sub_images_labels_list_noGAN.txt')
        print('remove %s ' % 'sub_images_labels_list_noGAN.txt')
def get_sub_images_from_prediction_results(para_file, polygons_shp,
                                           image_folder_or_path, image_pattern,
                                           saved_dir):

    class_names = parameters.get_string_list_parameters(
        para_file, 'object_names')

    dstnodata = parameters.get_digit_parameters(para_file, 'dst_nodata', 'int')
    bufferSize = parameters.get_digit_parameters(para_file, 'buffer_size',
                                                 'int')
    rectangle_ext = parameters.get_string_parameters_None_if_absence(
        para_file, 'b_use_rectangle')
    if rectangle_ext is not None:
        b_rectangle = True
    else:
        b_rectangle = False

    process_num = parameters.get_digit_parameters(para_file, 'process_num',
                                                  'int')

    get_sub_images_pixel_json_files(polygons_shp, image_folder_or_path,
                                    image_pattern, class_names, bufferSize,
                                    dstnodata, saved_dir, b_rectangle,
                                    process_num)

    pass
def split_train_val(para_file):
    print("split data set into training and validation")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    code_dir = os.path.join(os.path.dirname(sys.argv[0]), '..')
    sys.path.insert(0, code_dir)
    import parameters

    script = os.path.join(code_dir, 'datasets', 'train_test_split.py')

    training_data_per = parameters.get_digit_parameters_None_if_absence(
        para_file, 'training_data_per', 'float')
    train_sample_txt = parameters.get_string_parameters(
        para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')

    dir = 'list'
    all_img_list = os.path.join(dir, 'trainval.txt')

    # command_string = script + ' -p ' + str(training_data_per) + \
    #                  ' -t ' + train_sample_txt  + \
    #                  ' -v ' + val_sample_txt  + \
    #                  ' --shuffle ' + all_img_list
    # res = os.system(command_string)
    # if res!=0:
    #     sys.exit(1)

    Do_shuffle = True
    from datasets.train_test_split import train_test_split_main
    train_test_split_main(all_img_list, training_data_per, Do_shuffle,
                          train_sample_txt, val_sample_txt)

    # save brief information of image patches
    img_ext = parameters.get_string_parameters_None_if_absence(
        para_file, 'split_image_format')

    get_image_with_height_list(os.path.join(dir, train_sample_txt),
                               img_ext,
                               info_type='training')

    get_image_with_height_list(os.path.join(dir, val_sample_txt),
                               img_ext,
                               info_type='validation')

    # save the count of each classes in training and validation
    get_sample_count_of_each_class(os.path.join(dir, train_sample_txt),
                                   info_type='training')

    get_sample_count_of_each_class(os.path.join(dir, val_sample_txt),
                                   info_type='validation')
def training_img_augment(para_file):

    print("start data augmentation")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s'%(para_file, os.getcwd()))

    # augscript = os.path.join(code_dir,'datasets','image_augment.py')

    img_ext = parameters.get_string_parameters_None_if_absence(para_file,'split_image_format')
    print("image format: %s"% img_ext)
    proc_num = parameters.get_digit_parameters(para_file, 'process_num', 'int')

    SECONDS=time.time()

    from datasets.image_augment import image_augment_main

    #augment training images
    print("image augmentation on image patches")
    img_list_aug_txt = 'list/images_including_aug.txt'
    # command_string = augscript + ' -p ' + para_file + ' -d ' + 'split_images' + ' -e ' + img_ext + ' -n ' + str(proc_num) + \
    #                  ' -o ' + 'split_images' + ' -l ' + img_list_aug_txt + ' ' + 'list/trainval.txt'
    # res = os.system(command_string)
    # if res!=0:
    #     sys.exit(1)
    image_augment_main(para_file,'list/trainval.txt',img_list_aug_txt,'split_images','split_images',img_ext,False,proc_num)

    #augment training lables
    print("image augmentation on label patches")
    # command_string = augscript + ' -p ' + para_file + ' -d ' + 'split_labels' + ' -e ' + img_ext + ' -n ' + str(proc_num) + \
    #                  ' -o ' + 'split_labels' + ' -l ' + img_list_aug_txt + ' ' + 'list/trainval.txt' + ' --is_ground_truth '
    #
    # res = os.system(command_string)
    # if res!=0:
    #     sys.exit(1)
    # save the result to the same file (redundant, they have the same filename)
    image_augment_main(para_file, 'list/trainval.txt', img_list_aug_txt, 'split_labels', 'split_labels', img_ext, True,proc_num)

    if os.path.isfile(img_list_aug_txt):
        os.system(' cp %s list/trainval.txt'%img_list_aug_txt)
        os.system(' cp %s list/val.txt'%img_list_aug_txt)
    else:
        print('list/images_including_aug.txt does not exist because no data augmentation strings')


    # output the number of image patches (ls may failed if there are a lot of files, so remove these two lines)
    # os.system('echo "count of class 0 ":$(ls split_images/*class_0*${img_ext} |wc -l) >> time_cost.txt')
    # os.system('echo "count of class 1 ":$(ls split_images/*class_1*${img_ext} |wc -l) >> time_cost.txt')

    duration= time.time() - SECONDS
    os.system('echo "$(date): time cost of data augmentation: %.2f seconds">>time_cost.txt'%duration)
Exemple #7
0
def get_file_path_parameter(parafile, data_dir, data_name_or_pattern):

    data_dir = parameters.get_directory_None_if_absence(parafile, data_dir)
    data_name_or_pattern = parameters.get_string_parameters_None_if_absence(parafile, data_name_or_pattern)
    if data_dir is None or data_name_or_pattern is None:
        return None
    file_list = io_function.get_file_list_by_pattern(data_dir,data_name_or_pattern)

    if len(file_list) < 1:
        raise IOError('NO file in %s with name or pattern: %s'%(data_dir, data_name_or_pattern))
    if len(file_list) == 1:
        return file_list[0]
    else:
        # return multiple files
        return file_list
Exemple #8
0
def main(options, args):
    input = args[0]
    val_path = options.validation_shp

    para_file = options.para_file
    if val_path is None:
        # read validation shp from the parameter file
        multi_val_files = parameters.get_string_parameters_None_if_absence(
            para_file, 'validation_shape_list')
        if multi_val_files is None:
            val_path = parameters.get_validation_shape()
        else:
            cwd_path = os.getcwd()
            if os.path.isfile(multi_val_files) is False:
                multi_val_files = os.path.join(
                    os.path.dirname(os.path.dirname(cwd_path)),
                    multi_val_files)
            with open(multi_val_files, 'r') as f_obj:
                lines = f_obj.readlines()
                lines = [item.strip() for item in lines]

            folder = os.path.basename(cwd_path)
            import re
            I_idx_str = re.findall('I\d+', folder)
            if len(I_idx_str) == 1:
                index = int(I_idx_str[0][1:])
            else:
                # try to find the image idx from file name
                file_name = os.path.basename(input)
                I_idx_str = re.findall('I\d+', file_name)
                if len(I_idx_str) == 1:
                    index = int(I_idx_str[0][1:])
                else:
                    raise ValueError(
                        'Cannot find the I* which represents the image index')

            val_path = lines[index]
            # try to change the home folder path if the file does not exist
            val_path = io_function.get_file_path_new_home_folder(val_path)

    if os.path.isfile(val_path) is False:
        raise IOError(
            'validation polygon (%s) not exist, cannot save FP and FN polygons'
            % val_path)

    save_false_positve_and_false_negative(input, val_path, para_file)
Exemple #9
0
def main(options, args):
    input = args[0]

    # evaluation result
    multi_val_files = parameters.get_string_parameters_None_if_absence(
        '', 'validation_shape_list')
    if multi_val_files is None:
        val_path = parameters.get_validation_shape()
    else:
        val_path = io_function.get_path_from_txt_list_index(multi_val_files)
        # try to change the home folder path if the file does not exist
        val_path = io_function.get_file_path_new_home_folder(val_path)

    if os.path.isfile(val_path):
        basic.outputlogMessage(
            'Start evaluation, input: %s, validation file: %s' %
            (input, val_path))
        evaluation_result(input, val_path)
    else:
        basic.outputlogMessage(
            "warning, validation polygon (%s) not exist, skip evaluation" %
            val_path)
Exemple #10
0
def main(options, args):

    time_str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    print2file(log, time_str)

    para_file = options.para_file
    k_value = int(args[0])
    test_num = int(args[1])

    print2file(log, 'kvalue : %d' % k_value)
    print2file(log, 'test_num : %d' % test_num)

    global trained_model_dir
    trained_model_dir = parameters.get_string_parameters(
        para_file, 'expr_name')

    # get the path of multi training polygons
    multi_training_files = parameters.get_string_parameters_None_if_absence(
        para_file, 'multi_training_files')
    if multi_training_files is None:
        raise ValueError('multi_training_files is not set in the %s' %
                         para_file)

    io_function.is_file_exist(multi_training_files)

    # backup the original training file which contains the full set of polyogns
    training_files_allPolygons = io_function.get_name_by_adding_tail(
        multi_training_files, 'allPolygons')
    if os.path.isfile(training_files_allPolygons) is False:
        io_function.copy_file_to_dst(multi_training_files,
                                     training_files_allPolygons)
    else:
        basic.outputlogMessage(
            'The full set polygons already exist')  #%multi_training_files

    # training on using the k subset
    train_kfold_cross_val(training_files_allPolygons, multi_training_files,
                          k_value, test_num)
Exemple #11
0
para_file = sys.argv[1]
if os.path.isfile(para_file) is False:
    raise IOError('File %s not exists in current folder: %s' %
                  (para_file, os.getcwd()))

deeplabRS = os.path.expanduser('~/codes/PycharmProjects/DeeplabforRS')
sys.path.insert(0, deeplabRS)

import parameters
import basic_src.io_function as io_function

eo_dir = os.path.expanduser("~/codes/PycharmProjects/Landuse_DL")
get_subImage_script = os.path.join(eo_dir, 'sentinelScripts',
                                   'get_subImages.py')

multi_training_files = parameters.get_string_parameters_None_if_absence(
    para_file, 'multi_training_files')

input_image_dir = parameters.get_string_parameters(para_file,
                                                   'input_image_dir')

dstnodata = parameters.get_string_parameters(para_file, 'dst_nodata')
buffersize = parameters.get_string_parameters(para_file, 'buffer_size')
rectangle_ext = parameters.get_string_parameters(para_file, 'b_use_rectangle')

if os.path.isfile('sub_images_labels_list.txt'):
    io_function.delete_file_or_dir('sub_images_labels_list.txt')


def get_subImage_subLabel_one_shp(all_train_shp,
                                  buffersize,
                                  dstnodata,
Exemple #12
0
def main(options, args):
    polygons_shp = args[0]
    polygons_shp_backup = args[0]

    output = options.output
    if output is None:
        output = io_function.get_name_by_adding_tail(polygons_shp, 'removed')
    para_file = options.para_file

    assert io_function.is_file_exist(polygons_shp)

    # remove polygons based on area
    # area_thr = 1000  #10 pixels
    area_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_area', 'int')
    b_smaller = True
    if area_thr is not None:
        rm_area_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmArea')
        if remove_polygons(polygons_shp, 'INarea', area_thr, b_smaller,
                           rm_area_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on size failed")
        else:
            polygons_shp = rm_area_save_shp
    else:
        basic.outputlogMessage(
            'warning, minimum_area is absent in the para file, skip removing polygons based on areas'
        )

    # remove  polygons based on slope information
    # slope_small_thr = 2
    slope_small_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_slope', 'float')
    b_smaller = True
    if slope_small_thr is not None:
        rm_slope_save_shp1 = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmslope1')
        if remove_polygons(polygons_shp, 'slo_mean', slope_small_thr,
                           b_smaller, rm_slope_save_shp1) is False:
            basic.outputlogMessage(
                "error, removing polygons based on slo_mean failed")
        else:
            polygons_shp = rm_slope_save_shp1
    else:
        basic.outputlogMessage(
            'warning, minimum_slope is absent in the para file, skip removing polygons based on minimum slope'
        )

    # slope_large_thr = 20
    slope_large_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'maximum_slope', 'float')
    b_smaller = False
    if slope_large_thr is not None:
        rm_slope_save_shp2 = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmslope2')
        if remove_polygons(polygons_shp, 'slo_mean', slope_large_thr,
                           b_smaller, rm_slope_save_shp2) is False:
            basic.outputlogMessage(
                "error, removing polygons based on slo_mean (2) failed")
        else:
            polygons_shp = rm_slope_save_shp2
    else:
        basic.outputlogMessage(
            'warning, maximum_slope is absent in the para file, skip removing polygons based on maximum slope'
        )

    # remove polygons based on dem
    # dem_small_thr = 3000
    dem_small_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_elevation', 'int')
    b_smaller = True
    if dem_small_thr is not None:
        rm_dem_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmDEM')
        if remove_polygons(polygons_shp, 'dem_mean', dem_small_thr, b_smaller,
                           rm_dem_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on dem_mean failed")
        else:
            polygons_shp = rm_dem_save_shp
    else:
        basic.outputlogMessage(
            'warning, minimum_elevation is absent in the para file, skip removing polygons based on minimum elevation'
        )

    # remove polygons not in the extent
    outline_shp = parameters.get_string_parameters_None_if_absence(
        para_file, 'target_outline_shp')
    if outline_shp is not None:
        rm_outline_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmOutline')
        remove_polygons_outside_extent(polygons_shp, outline_shp,
                                       rm_outline_save_shp)
        polygons_shp = rm_outline_save_shp
    else:
        basic.outputlogMessage(
            'warning, target_outline_shp is absent in the para file, skip removing polygons based on outlines'
        )

    # copy to final output
    copy_shape_file(polygons_shp, output)

    pass
Exemple #13
0
def get_sub_images_multi_regions(para_file):

    print(
        "extract sub-images and sub-labels for a given shape file (training polygons)"
    )

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    get_subImage_script = os.path.join(code_dir, 'datasets',
                                       'get_subImages.py')
    SECONDS = time.time()

    # get name of training areas
    multi_training_regions = parameters.get_string_list_parameters_None_if_absence(
        para_file, 'training_regions')
    if multi_training_regions is None or len(multi_training_regions) < 1:
        raise ValueError('No training area is set in %s' % para_file)

    # multi_training_files = parameters.get_string_parameters_None_if_absence(para_file, 'multi_training_files')

    dstnodata = parameters.get_string_parameters(para_file, 'dst_nodata')
    buffersize = parameters.get_string_parameters(para_file, 'buffer_size')
    rectangle_ext = parameters.get_string_parameters(para_file,
                                                     'b_use_rectangle')
    process_num = parameters.get_digit_parameters(para_file, 'process_num',
                                                  'int')

    b_no_label_image = parameters.get_bool_parameters_None_if_absence(
        para_file, 'b_no_label_image')

    if os.path.isfile('sub_images_labels_list.txt'):
        io_function.delete_file_or_dir('sub_images_labels_list.txt')

    subImage_dir = parameters.get_string_parameters_None_if_absence(
        para_file, 'input_train_dir')
    subLabel_dir = parameters.get_string_parameters_None_if_absence(
        para_file, 'input_label_dir')

    # loop each training regions
    for idx, area_ini in enumerate(multi_training_regions):

        input_image_dir = parameters.get_directory_None_if_absence(
            area_ini, 'input_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        input_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'input_image_or_pattern')

        b_sub_images_json = parameters.get_bool_parameters(
            area_ini, 'b_sub_images_json')
        if b_sub_images_json is True:
            # copy sub-images, then covert json files to label images.
            object_names = parameters.get_string_list_parameters(
                para_file, 'object_names')
            get_subImages_json.get_subimages_label_josn(
                input_image_dir,
                input_image_or_pattern,
                subImage_dir,
                subLabel_dir,
                object_names,
                b_no_label_image=b_no_label_image,
                process_num=process_num)

            pass
        else:

            all_train_shp = parameters.get_file_path_parameters_None_if_absence(
                area_ini, 'training_polygons')
            train_shp = parameters.get_string_parameters(
                area_ini, 'training_polygons_sub')

            # get subImage and subLabel for one training polygons
            print(
                'extract training data from image folder (%s) and polgyons (%s)'
                % (input_image_dir, train_shp))
            if b_no_label_image is True:
                get_subImage_one_shp(get_subImage_script,
                                     all_train_shp,
                                     buffersize,
                                     dstnodata,
                                     rectangle_ext,
                                     train_shp,
                                     input_image_dir,
                                     file_pattern=input_image_or_pattern,
                                     process_num=process_num)
            else:
                get_subImage_subLabel_one_shp(
                    get_subImage_script,
                    all_train_shp,
                    buffersize,
                    dstnodata,
                    rectangle_ext,
                    train_shp,
                    input_image_dir,
                    file_pattern=input_image_or_pattern,
                    process_num=process_num)

    # check black sub-images or most part of the sub-images is black (nodata)
    new_sub_image_label_list = []
    delete_sub_image_label_list = []
    subImage_dir_delete = subImage_dir + '_delete'
    subLabel_dir_delete = subLabel_dir + '_delete'
    io_function.mkdir(subImage_dir_delete)
    if b_no_label_image is None or b_no_label_image is False:
        io_function.mkdir(subLabel_dir_delete)
    get_valid_percent_entropy.plot_valid_entropy(subImage_dir)
    with open('sub_images_labels_list.txt', 'r') as f_obj:
        lines = f_obj.readlines()
        for line in lines:
            image_path, label_path = line.strip().split(':')
            # valid_per = raster_io.get_valid_pixel_percentage(image_path)
            valid_per, entropy = raster_io.get_valid_percent_shannon_entropy(
                image_path)  # base=10
            if valid_per > 60 and entropy >= 0.5:
                new_sub_image_label_list.append(line)
            else:
                delete_sub_image_label_list.append(line)
                io_function.movefiletodir(image_path, subImage_dir_delete)
                if os.path.isfile(label_path):
                    io_function.movefiletodir(label_path, subLabel_dir_delete)
    if len(delete_sub_image_label_list) > 0:
        with open('sub_images_labels_list.txt', 'w') as f_obj:
            for line in new_sub_image_label_list:
                f_obj.writelines(line)

    # check weather they have the same subImage and subLabel
    if b_no_label_image is None or b_no_label_image is False:
        sub_image_list = io_function.get_file_list_by_pattern(
            subImage_dir, '*.tif')
        sub_label_list = io_function.get_file_list_by_pattern(
            subLabel_dir, '*.tif')
        if len(sub_image_list) != len(sub_label_list):
            raise ValueError(
                'the count of subImage (%d) and subLabel (%d) is different' %
                (len(sub_image_list), len(sub_label_list)))

    # save brief information of sub-images
    height_list = []
    width_list = []
    band_count = 0
    dtype = 'unknown'
    for line in new_sub_image_label_list:
        image_path, label_path = line.strip().split(':')
        height, width, band_count, dtype = raster_io.get_height_width_bandnum_dtype(
            image_path)
        height_list.append(height)
        width_list.append(width)
    # save info to file, if it exists, it will be overwritten
    img_count = len(new_sub_image_label_list)
    with open('sub_images_patches_info.txt', 'w') as f_obj:
        f_obj.writelines('information of sub-images: \n')
        f_obj.writelines('number of sub-images : %d \n' % img_count)
        f_obj.writelines('band count : %d \n' % band_count)
        f_obj.writelines('data type : %s \n' % dtype)
        f_obj.writelines('maximum width and height: %d, %d \n' %
                         (max(width_list), max(height_list)))
        f_obj.writelines('minimum width and height: %d, %d \n' %
                         (min(width_list), min(height_list)))
        f_obj.writelines(
            'mean width and height: %.2f, %.2f \n\n' %
            (sum(width_list) / img_count, sum(height_list) / img_count))

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of getting sub images and labels: %.2f seconds">>time_cost.txt'
        % duration)
Exemple #14
0
def split_train_val(para_file):
    print("split data set into training and validation")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' % (para_file, os.getcwd()))

    code_dir = os.path.join(os.path.dirname(sys.argv[0]), '..')
    sys.path.insert(0, code_dir)
    import parameters

    script = os.path.join(code_dir, 'datasets', 'train_test_split.py')

    training_data_per = parameters.get_digit_parameters_None_if_absence(para_file, 'training_data_per','float')
    train_sample_txt = parameters.get_string_parameters(para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(para_file, 'validation_sample_list_txt')

    dir = 'list'
    all_img_list = os.path.join(dir,'trainval.txt')

    # command_string = script + ' -p ' + str(training_data_per) + \
    #                  ' -t ' + train_sample_txt  + \
    #                  ' -v ' + val_sample_txt  + \
    #                  ' --shuffle ' + all_img_list
    # res = os.system(command_string)
    # if res!=0:
    #     sys.exit(1)

    if training_data_per is None:
        # similar to VOC dataset, we only used 1449 images for validation (because the data also used for training,
        # so it is training accuracy, not validation accuracy)
        with open(all_img_list, 'r') as f_obj:
            file_names = f_obj.readlines()
            if len(file_names) < 1449:
                # val.txt is identical to trainval.txt
                io_function.copy_file_to_dst(all_img_list,os.path.join(dir,train_sample_txt))
                io_function.copy_file_to_dst(all_img_list,os.path.join(dir,val_sample_txt))
            else:
                io_function.copy_file_to_dst(all_img_list, os.path.join(dir, train_sample_txt))
                # randomly get 1449 image from trainval.txt
                import random
                sel_file_index = random.sample(range(len(file_names)), 1449)  # get a list of number without duplicates
                with open(os.path.join(dir, val_sample_txt), 'w') as w_obj:
                    sel_file_names = [file_names[item] for item in sel_file_index]
                    w_obj.writelines(sel_file_names)
    else:
        # split training and validation datasets
        Do_shuffle = True
        from datasets.train_test_split import train_test_split_main
        train_test_split_main(all_img_list,training_data_per,Do_shuffle,train_sample_txt,val_sample_txt)


    # save brief information of image patches
    img_ext = parameters.get_string_parameters_None_if_absence(para_file, 'split_image_format')

    get_image_with_height_list(os.path.join(dir,train_sample_txt), img_ext, info_type='training')

    get_image_with_height_list(os.path.join(dir,val_sample_txt), img_ext, info_type='validation')

    # save the count of each classes in training and validation
    get_sample_count_of_each_class(os.path.join(dir, train_sample_txt), info_type='training')

    get_sample_count_of_each_class(os.path.join(dir, val_sample_txt), info_type='validation')
def remove_polygons_main(polygons_shp, output, para_file):

    polygons_shp_backup = polygons_shp
    assert io_function.is_file_exist(polygons_shp)

    # remove polygons based on area
    # area_thr = 1000  #10 pixels
    area_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_area', 'int')
    b_smaller = True
    if area_thr is not None:
        rm_area_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmArea')
        if remove_polygons(polygons_shp, 'INarea', area_thr, b_smaller,
                           rm_area_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on size failed")
        else:
            polygons_shp = rm_area_save_shp
    else:
        basic.outputlogMessage(
            'warning, minimum_area is absent in the para file, skip removing polygons based on areas'
        )

    # remove  polygons based on slope information
    # slope_small_thr = 2
    slope_small_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_slope', 'float')
    b_smaller = True
    if slope_small_thr is not None:
        rm_slope_save_shp1 = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmslope1')
        if remove_polygons(polygons_shp, 'slo_mean', slope_small_thr,
                           b_smaller, rm_slope_save_shp1) is False:
            basic.outputlogMessage(
                "error, removing polygons based on slo_mean failed")
        else:
            polygons_shp = rm_slope_save_shp1
    else:
        basic.outputlogMessage(
            'warning, minimum_slope is absent in the para file, skip removing polygons based on minimum slope'
        )

    # slope_large_thr = 20
    slope_large_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'maximum_slope', 'float')
    b_smaller = False
    if slope_large_thr is not None:
        rm_slope_save_shp2 = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmslope2')
        if remove_polygons(polygons_shp, 'slo_mean', slope_large_thr,
                           b_smaller, rm_slope_save_shp2) is False:
            basic.outputlogMessage(
                "error, removing polygons based on slo_mean (2) failed")
        else:
            polygons_shp = rm_slope_save_shp2
    else:
        basic.outputlogMessage(
            'warning, maximum_slope is absent in the para file, skip removing polygons based on maximum slope'
        )

    # remove polygons based on dem
    # dem_small_thr = 3000
    dem_small_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_elevation', 'int')
    b_smaller = True
    if dem_small_thr is not None:
        rm_dem_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmDEM')
        if remove_polygons(polygons_shp, 'dem_mean', dem_small_thr, b_smaller,
                           rm_dem_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on dem_mean failed")
        else:
            polygons_shp = rm_dem_save_shp
    else:
        basic.outputlogMessage(
            'warning, minimum_elevation is absent in the para file, skip removing polygons based on minimum elevation'
        )

    ratio_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_ratio_width_height', 'float')
    b_smaller = True
    if ratio_thr is not None:
        rm_ratio_w_h_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmRwh')
        if remove_polygons(polygons_shp, 'ratio_w_h', ratio_thr, b_smaller,
                           rm_ratio_w_h_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on ratio_w_h failed")
        else:
            polygons_shp = rm_ratio_w_h_save_shp
    else:
        basic.outputlogMessage(
            'warning, minimum_ratio_width_height is absent in the para file, skip removing polygons based on minimum ratio of width and height'
        )

    # remove polygons based elevation reduction
    minimum_dem_reduction_area_thr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'minimum_dem_reduction_area', 'float')
    b_smaller = True
    if minimum_dem_reduction_area_thr is not None:
        rm_demD_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmdemD')
        if remove_polygons(polygons_shp, 'demD_area',
                           minimum_dem_reduction_area_thr, b_smaller,
                           rm_demD_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on demD_area failed")
        else:
            polygons_shp = rm_demD_save_shp
    else:
        basic.outputlogMessage(
            'warning, minimum_dem_reduction_area is absent in the para file, skip removing polygons based on minimum_dem_reduction_area'
        )

    # remove polygons based on occurrence
    min_ocurr = parameters.get_digit_parameters_None_if_absence(
        para_file, 'threshold_occurrence_multi_observation', 'int')
    b_smaller = True
    if min_ocurr is not None:
        rm_occur_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'RmOccur')
        if remove_polygons(polygons_shp, 'time_occur', min_ocurr, b_smaller,
                           rm_occur_save_shp) is False:
            basic.outputlogMessage(
                "error, removing polygons based on time_occur failed")
        else:
            polygons_shp = rm_occur_save_shp
    else:
        basic.outputlogMessage(
            'warning, threshold_occurrence_multi_observation is absent in the para file, '
            'skip removing polygons based on it')

    # remove polygons not in the extent
    outline_shp = parameters.get_string_parameters_None_if_absence(
        para_file, 'target_outline_shp')
    if outline_shp is not None:
        rm_outline_save_shp = io_function.get_name_by_adding_tail(
            polygons_shp_backup, 'rmOutline')
        remove_polygons_outside_extent(polygons_shp, outline_shp,
                                       rm_outline_save_shp)
        polygons_shp = rm_outline_save_shp
    else:
        basic.outputlogMessage(
            'warning, target_outline_shp is absent in the para file, skip removing polygons based on outlines'
        )

    # copy to final output
    copy_shape_file(polygons_shp, output)

    pass
Exemple #16
0
def postProcess(para_file,inf_post_note, b_skip_getshp=False,test_id=None):
    # test_id is the related to training

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' % (para_file, os.getcwd()))

    # the test string in 'exe.sh'
    test_note = inf_post_note

    WORK_DIR = os.getcwd()

    SECONDS = time.time()

    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    network_setting_ini = parameters.get_string_parameters(para_file,'network_setting_ini')
    gan_setting_ini = parameters.get_string_parameters_None_if_absence (para_file,'regions_n_setting_image_translation_ini')


    inf_dir = parameters.get_directory(para_file, 'inf_output_dir')
    if test_id is None:
        test_id = os.path.basename(WORK_DIR) + '_' + expr_name

    # get name of inference areas
    multi_inf_regions = parameters.get_string_list_parameters(para_file, 'inference_regions')

    # run post-processing parallel
    # max_parallel_postProc_task = 8

    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    io_function.mkdir(backup_dir)

    # loop each inference regions
    sub_tasks = []
    same_area_time_inis =  group_same_area_time_observations(multi_inf_regions)
    region_eva_reports = {}
    for key in same_area_time_inis.keys():
        multi_observations = same_area_time_inis[key]
        area_name = parameters.get_string_parameters(multi_observations[0], 'area_name')  # they have the same name and time
        area_time = parameters.get_string_parameters(multi_observations[0], 'area_time')
        merged_shp_list = []
        map_raster_list_2d = [None] * len(multi_observations)
        for area_idx, area_ini in enumerate(multi_observations):
            area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
            area_save_dir, shp_pre,_ = get_observation_save_dir_shp_pre(inf_dir,area_name,area_time,area_remark,test_id)

            # get image list
            inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')
            # it is ok consider a file name as pattern and pass it the following functions to get file list
            inf_image_or_pattern = parameters.get_string_parameters(area_ini, 'inf_image_or_pattern')
            inf_img_list = io_function.get_file_list_by_pattern(inf_image_dir,inf_image_or_pattern)
            img_count = len(inf_img_list)
            if img_count < 1:
                raise ValueError('No image for inference, please check inf_image_dir and inf_image_or_pattern in %s'%area_ini)

            merged_shp = os.path.join(WORK_DIR, area_save_dir, shp_pre + '.shp')
            if b_skip_getshp:
                pass
            else:
                # post image one by one
                result_shp_list = []
                map_raster_list = []
                for img_idx, img_path in enumerate(inf_img_list):
                    out_shp, out_raster = inf_results_to_shapefile(WORK_DIR, img_idx, area_save_dir, test_id)
                    if out_shp is None or out_raster is None:
                        continue
                    result_shp_list.append(os.path.join(WORK_DIR,out_shp))
                    map_raster_list.append(out_raster)
                # merge shapefiles
                if merge_shape_files(result_shp_list,merged_shp) is False:
                    continue
                map_raster_list_2d[area_idx] = map_raster_list

            merged_shp_list.append(merged_shp)

        if b_skip_getshp is False:
            # add occurrence to each polygons
            get_occurence_for_multi_observation(merged_shp_list)

        for area_idx, area_ini in enumerate(multi_observations):
            area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
            area_save_dir, shp_pre, area_remark_time  = get_observation_save_dir_shp_pre(inf_dir, area_name, area_time, area_remark,test_id)

            merged_shp = os.path.join(WORK_DIR, area_save_dir, shp_pre + '.shp')
            if os.path.isfile(merged_shp) is False:
                print('Warning, %s not exist, skip'%merged_shp)
                continue

            # add attributes to shapefile
            # add_attributes_script = os.path.join(code_dir,'datasets', 'get_polygon_attributes.py')
            shp_attributes = os.path.join(WORK_DIR,area_save_dir, shp_pre+'_post_NOrm.shp')
            # add_polygon_attributes(add_attributes_script,merged_shp, shp_attributes, para_file, area_ini )
            add_polygon_attributes(merged_shp, shp_attributes, para_file, area_ini)

            # remove polygons
            # rm_polygon_script = os.path.join(code_dir,'datasets', 'remove_mappedPolygons.py')
            shp_post = os.path.join(WORK_DIR, area_save_dir, shp_pre+'_post.shp')
            # remove_polygons(rm_polygon_script,shp_attributes, shp_post, para_file)
            remove_polygons_main(shp_attributes, shp_post, para_file)

            # evaluate the mapping results
            # eval_shp_script = os.path.join(code_dir,'datasets', 'evaluation_result.py')
            out_report = os.path.join(WORK_DIR, area_save_dir, shp_pre+'_evaluation_report.txt')
            # evaluation_polygons(eval_shp_script, shp_post, para_file, area_ini,out_report)
            evaluation_polygons(shp_post,para_file,area_ini,out_report)


            ##### copy and backup files ######
            # copy files to result_backup
            if len(test_note) > 0:
                backup_dir_area = os.path.join(backup_dir, area_name + '_' + area_remark_time + '_' + test_id + '_' + test_note)
            else:
                backup_dir_area = os.path.join(backup_dir, area_name + '_' + area_remark_time + '_' + test_id )
            io_function.mkdir(backup_dir_area)
            if len(test_note) > 0:
                bak_merged_shp = os.path.join(backup_dir_area, '_'.join([shp_pre,test_note]) + '.shp')
                bak_post_shp = os.path.join(backup_dir_area, '_'.join([shp_pre,'post',test_note]) + '.shp')
                bak_eva_report = os.path.join(backup_dir_area, '_'.join([shp_pre,'eva_report',test_note]) + '.txt')
                bak_area_ini = os.path.join(backup_dir_area, '_'.join([shp_pre,'region',test_note]) + '.ini')
            else:
                bak_merged_shp = os.path.join(backup_dir_area, '_'.join([shp_pre]) + '.shp')
                bak_post_shp = os.path.join(backup_dir_area, '_'.join([shp_pre, 'post']) + '.shp')
                bak_eva_report = os.path.join(backup_dir_area, '_'.join([shp_pre, 'eva_report']) + '.txt')
                bak_area_ini = os.path.join(backup_dir_area, '_'.join([shp_pre, 'region']) + '.ini')

            io_function.copy_shape_file(merged_shp,bak_merged_shp)
            io_function.copy_shape_file(shp_post, bak_post_shp)
            if os.path.isfile(out_report):
                io_function.copy_file_to_dst(out_report, bak_eva_report, overwrite=True)
            io_function.copy_file_to_dst(area_ini, bak_area_ini, overwrite=True)

            # copy map raster
            b_backup_map_raster = parameters.get_bool_parameters_None_if_absence(area_ini,'b_backup_map_raster')
            if b_backup_map_raster is True:
                if map_raster_list_2d[area_idx] is not None:
                    for map_tif in map_raster_list_2d[area_idx]:
                        bak_map_tif = os.path.join(backup_dir_area,os.path.basename(map_tif))
                        io_function.copy_file_to_dst(map_tif,bak_map_tif,overwrite=True)

            region_eva_reports[shp_pre] = bak_eva_report



    if len(test_note) > 0:
        bak_para_ini = os.path.join(backup_dir, '_'.join([test_id,'para',test_note]) + '.ini' )
        bak_network_ini = os.path.join(backup_dir, '_'.join([test_id,'network',test_note]) + '.ini' )
        bak_gan_ini = os.path.join(backup_dir, '_'.join([test_id,'gan',test_note]) + '.ini' )
        bak_time_cost = os.path.join(backup_dir, '_'.join([test_id,'time_cost',test_note]) + '.txt' )
    else:
        bak_para_ini = os.path.join(backup_dir, '_'.join([test_id, 'para']) + '.ini')
        bak_network_ini = os.path.join(backup_dir, '_'.join([test_id, 'network']) + '.ini')
        bak_gan_ini = os.path.join(backup_dir, '_'.join([test_id, 'gan']) + '.ini')
        bak_time_cost = os.path.join(backup_dir, '_'.join([test_id, 'time_cost']) + '.txt')
    io_function.copy_file_to_dst(para_file, bak_para_ini,overwrite=True)
    io_function.copy_file_to_dst(network_setting_ini, bak_network_ini,overwrite=True)
    if gan_setting_ini is not None:
        io_function.copy_file_to_dst(gan_setting_ini, bak_gan_ini,overwrite=True)
    if os.path.isfile('time_cost.txt'):
        io_function.copy_file_to_dst('time_cost.txt', bak_time_cost,overwrite=True)

    # output the evaluation report to screen
    for key in region_eva_reports.keys():
        report = region_eva_reports[key]
        if os.path.isfile(report) is False:
            continue
        print('evaluation report for %s:'%key)
        os.system('head -n 7 %s'%report)

    # output evaluation report to table
    if len(test_note) > 0:
        out_table = os.path.join(backup_dir, '_'.join([test_id,'accuracy_table',test_note]) + '.xlsx' )
    else:
        out_table = os.path.join(backup_dir, '_'.join([test_id, 'accuracy_table']) + '.xlsx')
    eva_reports = [ region_eva_reports[key] for key in region_eva_reports if os.path.isfile(region_eva_reports[key])]
    eva_report_to_tables.eva_reports_to_table(eva_reports, out_table)

    duration= time.time() - SECONDS
    os.system('echo "$(date): time cost of post-procesing: %.2f seconds">>time_cost.txt'%duration)
Exemple #17
0
def train_evaluation_deeplab(WORK_DIR, deeplab_dir, expr_name, para_file,
                             network_setting_ini, gpu_num):

    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    evl_script = os.path.join(deeplab_dir, 'eval.py')
    evl_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'validation_sample_list_txt'))[0]
    max_eva_number = 1

    # validation interval (epoch)
    validation_interval = parameters.get_digit_parameters_None_if_absence(
        para_file, 'validation_interval', 'int')
    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True
    if validation_interval is None:
        basic.outputlogMessage(
            'No input validation_interval, so training to %d, then evaluating in the end'
            % iteration_num)
        # run training
        train_deeplab(train_script, dataset, train_split, num_of_classes,
                      base_learning_rate, model_variant, init_checkpoint,
                      TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1,
                      train_atrous_rates2, train_atrous_rates3,
                      train_output_stride, crop_size_str, batch_size,
                      iteration_num, depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, b_initialize_last_layer)

        # run evaluation
        evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                           model_variant, inf_atrous_rates1, inf_atrous_rates2,
                           inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                           EVAL_LOGDIR, dataset_dir, crop_size_str,
                           max_eva_number, depth_multiplier,
                           decoder_output_stride, aspp_convs_filters)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        get_loss_learning_rate_list(TRAIN_LOGDIR)
    else:
        basic.outputlogMessage(
            'training to the maximum iteration of %d, and evaluating very %d epoch(es)'
            % (iteration_num, validation_interval))
        for epoch in range(validation_interval,
                           total_epoches + validation_interval,
                           validation_interval):

            to_iter_num = min(epoch * iter_per_epoch, iteration_num)
            if to_iter_num <= already_trained_iteration:
                continue
            basic.outputlogMessage(
                'training and evaluating to %d epoches (to iteration: %d)' %
                (epoch, to_iter_num))

            # run training
            train_deeplab(train_script, dataset, train_split, num_of_classes,
                          base_learning_rate, model_variant, init_checkpoint,
                          TRAIN_LOGDIR, dataset_dir, gpu_num,
                          train_atrous_rates1, train_atrous_rates2,
                          train_atrous_rates3, train_output_stride,
                          crop_size_str, batch_size, to_iter_num,
                          depth_multiplier, decoder_output_stride,
                          aspp_convs_filters, b_initialize_last_layer)

            # run evaluation
            evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                               model_variant, inf_atrous_rates1,
                               inf_atrous_rates2, inf_atrous_rates3,
                               inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR,
                               dataset_dir, crop_size_str, max_eva_number,
                               depth_multiplier, decoder_output_stride,
                               aspp_convs_filters)

            # get miou
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            # save loss value to disk
            get_loss_learning_rate_list(TRAIN_LOGDIR)
            # check if need to early stopping
            if b_early_stopping:
                if len(miou_dict['overall']) >= 5:
                    # if the last five miou did not improve, then stop training
                    if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                              ):  # 0.0001 (%0.01)  # 0.5 %
                        basic.outputlogMessage(
                            'early stopping: stop training because overall miou did not improved in the last five evaluation'
                        )
                        output_early_stopping_message(TRAIN_LOGDIR)
                        break

    # plot mIOU, loss, and learnint rate curves
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)

    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
Exemple #18
0
def image_label_to_yolo_format(para_file):

    print("Image labels (semantic segmentation) to YOLO object detection")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    img_ext = parameters.get_string_parameters_None_if_absence(
        para_file, 'split_image_format')
    proc_num = parameters.get_digit_parameters(para_file, 'process_num', 'int')

    SECONDS = time.time()

    # get image and label path
    image_list = []
    label_list = []
    with open(os.path.join('list', 'trainval.txt'), 'r') as f_obj:
        lines = [item.strip() for item in f_obj.readlines()]
        for line in lines:
            image_list.append(os.path.join('split_images', line + img_ext))
            label_list.append(os.path.join('split_labels', line + img_ext))

    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    b_ignore_edge_objects = parameters.get_bool_parameters_None_if_absence(
        para_file, 'b_ignore_edge_objects')
    if b_ignore_edge_objects is None:
        b_ignore_edge_objects = False

    # get boxes
    total_count = len(image_list)
    for idx, (img, label) in enumerate(zip(image_list, label_list)):
        get_yolo_boxes_one_img(idx,
                               total_count,
                               img,
                               label,
                               num_classes_noBG,
                               rm_edge_obj=b_ignore_edge_objects)

    # write obj.data file
    train_sample_txt = parameters.get_string_parameters(
        para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')
    train_img_list = get_image_list('list', train_sample_txt, 'split_images',
                                    img_ext)
    val_img_list = get_image_list('list', val_sample_txt, 'split_images',
                                  img_ext)

    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    object_names = parameters.get_string_list_parameters(
        para_file, 'object_names')
    io_function.mkdir('data')
    io_function.mkdir(expr_name)

    with open(os.path.join('data', 'obj.data'), 'w') as f_obj:
        f_obj.writelines('classes = %d' % num_classes_noBG + '\n')

        train_txt = os.path.join('data', 'train.txt')
        io_function.save_list_to_txt(train_txt, train_img_list)
        f_obj.writelines('train = %s' % train_txt + '\n')

        val_txt = os.path.join('data', 'val.txt')
        io_function.save_list_to_txt(val_txt, val_img_list)
        f_obj.writelines('valid = %s' % val_txt + '\n')

        obj_name_txt = os.path.join('data', 'obj.names')
        io_function.save_list_to_txt(obj_name_txt, object_names)
        f_obj.writelines('names = %s' % obj_name_txt + '\n')

        f_obj.writelines('backup = %s' % expr_name + '\n')

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of converting to yolo format: %.2f seconds">>time_cost.txt'
        % duration)

    pass
Exemple #19
0
def train_evaluation_deeplab_separate(WORK_DIR, deeplab_dir, expr_name,
                                      para_file, network_setting_ini, gpu_num):
    '''
    in "train_evaluation_deeplab", run training, stop, then evaluation, then traininng, make learning rate strange, and the results worse.
    so in this function, we start two process, one for training, another for evaluation (run on CPU)
    '''
    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    # validation interval (epoch), do
    # validation_interval = parameters.get_digit_parameters_None_if_absence(para_file,'validation_interval','int')

    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True

    save_interval_secs = 1200  # default is 1200 second for saving model
    save_summaries_secs = 600  # default is 600 second for saving summaries
    eval_interval_secs = save_interval_secs  # default is 300 second for running evaluation, if no new saved model, no need to run evaluation?

    train_process = Process(
        target=train_deeplab,
        args=(train_script, dataset, train_split, num_of_classes,
              base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR,
              dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2,
              train_atrous_rates3, train_output_stride, crop_size_str,
              batch_size, iteration_num, depth_multiplier,
              decoder_output_stride, aspp_convs_filters,
              b_initialize_last_layer))
    train_process.start()
    time.sleep(60)  # wait
    if train_process.exitcode is not None and train_process.exitcode != 0:
        sys.exit(1)

    # eval_process.start()
    # time.sleep(10)  # wait
    # if eval_process.exitcode is not None and eval_process.exitcode != 0:
    #     sys.exit(1)

    while True:

        # only run evaluation when there is new trained model
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        basic.outputlogMessage(
            'Already trained iteration: %d, latest evaluation at %d step' %
            (already_trained_iteration, miou_dict['step'][-1]))
        if already_trained_iteration > miou_dict['step'][-1]:

            # run evaluation and wait until it finished
            gpuid = ""  # set gpuid to empty string, making evaluation run on CPU
            evl_script = os.path.join(deeplab_dir, 'eval.py')
            evl_split = os.path.splitext(
                parameters.get_string_parameters(
                    para_file, 'validation_sample_list_txt'))[0]
            # max_eva_number = -1  # run as many evaluation as possible, --eval_interval_secs (default is 300 seconds)
            max_eva_number = 1  # only run once inside the while loop, use while loop to control multiple evaluation
            eval_process = Process(
                target=evaluation_deeplab,
                args=(evl_script, dataset, evl_split, num_of_classes,
                      model_variant, inf_atrous_rates1, inf_atrous_rates2,
                      inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                      EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number,
                      depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, gpuid, eval_interval_secs))
            eval_process.start(
            )  # put Process inside while loop to avoid error: AssertionError: cannot start a process twice
            while eval_process.is_alive():
                time.sleep(5)

        # check if need early stopping
        if b_early_stopping:
            print(datetime.now(), 'check early stopping')
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            if 'overall' in miou_dict.keys() and len(
                    miou_dict['overall']) >= 5:
                # if the last five miou did not improve, then stop training
                if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                          ):  # 0.0001 (%0.01)  # 0.5 %
                    basic.outputlogMessage(
                        'early stopping: stop training because overall miou did not improved in the last five evaluation'
                    )
                    output_early_stopping_message(TRAIN_LOGDIR)

                    # train_process.kill()    # this one seems not working
                    # subprocess pid different from ps output
                    # https://stackoverflow.com/questions/4444141/subprocess-pid-different-from-ps-output
                    # os.system('kill ' + str(train_process.pid)) # still not working.  train_process.pid is not the one output by ps -aux

                    # train_process.terminate()   # Note that descendant processes of the process will not be terminated
                    # train_process.join()        # Wait until child process terminates

                    with open('train_py_pid.txt', 'r') as f_obj:
                        lines = f_obj.readlines()
                        train_pid = int(lines[0].strip())
                        os.system('kill ' + str(train_pid))
                        basic.outputlogMessage(
                            'kill training processing with id: %d' % train_pid)

                    break  # this breaks the while loop, making that it may not evaluate on some new saved model.

        # if the evaluation step is less than saved model iteration, run another iteration again immediately
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        if already_trained_iteration > miou_dict['step'][-1]:
            continue

        # if finished training
        if train_process.is_alive() is False:
            break
        # # if eval_process exit, then quit training as well
        # if eval_process.is_alive() is False and train_process.is_alive():
        #     train_process.kill()
        #     break
        time.sleep(eval_interval_secs)  # wait for next evaluation

    # save loss value to disk
    get_loss_learning_rate_list(TRAIN_LOGDIR)
    # get miou again
    miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)

    # eval_process did not exit as expected, kill it again.
    # os.system('kill ' + str(eval_process.pid))

    # get iou and backup
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    patch_info = os.path.join(WORK_DIR, 'sub_images_patches_info.txt')

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)
    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)

    new_patch_info = os.path.join(backup_dir,
                                  test_id + '_' + os.path.basename(patch_info))
    io_function.copy_file_to_dst(patch_info, new_patch_info, overwrite=True)

    # plot mIOU, loss, and learnint rate curves, and backup
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
def image_translate_train_generate_main(para_file, gpu_num):
    '''
     apply GAN to translate image from source domain to target domain

    existing sub-images (with sub-labels), these are image in source domain
    depend images for inference but no training data, each image for inference can be considered as on target domain

    '''
    print(datetime.now(), "image translation (train and generate) using GAN")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    gan_para_file = parameters.get_string_parameters_None_if_absence(
        para_file, 'regions_n_setting_image_translation_ini')
    if gan_para_file is None:
        print(
            'regions_n_setting_image_translation_ini is not set, skip image translation using GAN'
        )
        return None
    gan_para_file = os.path.abspath(
        gan_para_file
    )  # change to absolute path, because later, we change folder
    training_regions = parameters.get_string_list_parameters(
        para_file, 'training_regions')

    machine_name = os.uname()[1]
    SECONDS = time.time()

    # get regions (equal to or subset of inference regions) need apply image translation
    multi_gan_regions = parameters.get_string_list_parameters(
        gan_para_file, 'regions_need_image_translation')
    multi_gan_source_regions = parameters.get_string_list_parameters(
        gan_para_file, 'source_domain_regions')
    # check target domain
    if len(multi_gan_source_regions) != len(multi_gan_regions):
        raise ValueError(
            'the number of source domain and target domain is different')
    if set(multi_gan_source_regions).issubset(training_regions) is False:
        raise ValueError(
            'the source domain regions are not the subset of training regions')
    for area_idx, (area_gan_ini, area_src_ini) in enumerate(
            zip(multi_gan_regions, multi_gan_source_regions)):
        basic.outputlogMessage('%d: source and target area: %s vs %s' %
                               (area_idx, area_src_ini, area_gan_ini))

    gan_working_dir = parameters.get_string_parameters(gan_para_file,
                                                       'working_root')
    # gan_dir_pre_name = parameters.get_string_parameters(gan_para_file, 'gan_dir_pre_name')
    # use GAN model name as the gan_dir_pre_name
    gan_model = parameters.get_string_parameters(gan_para_file, 'gan_model')
    gan_dir_pre_name = gan_model  # '_' +

    # loop each regions need image translation
    sub_tasks = []
    for area_idx, (area_gan_ini, area_src_ini) in enumerate(
            zip(multi_gan_regions, multi_gan_source_regions)):

        area_ini = os.path.abspath(area_gan_ini)
        area_src_ini = os.path.abspath(area_src_ini)
        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        inf_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'inf_image_or_pattern')

        inf_img_list = io_function.get_file_list_by_pattern(
            inf_image_dir, inf_image_or_pattern)
        img_count = len(inf_img_list)
        if img_count < 1:
            raise ValueError(
                'No image for image translation, please check inf_image_dir and inf_image_or_pattern in %s'
                % area_ini)

        gan_project_save_dir = get_gan_project_save_dir(
            gan_working_dir, gan_dir_pre_name, area_name, area_remark,
            area_time, area_src_ini)

        if os.path.isdir(gan_project_save_dir):
            if generate_image_exists(gan_project_save_dir) is True:
                basic.outputlogMessage(
                    'generated new images (generate.txt_done) exist for %s exist, skip'
                    % gan_project_save_dir)
                continue
        else:
            io_function.mkdir(gan_project_save_dir)

        # parallel run image translation for this area
        CUDA_VISIBLE_DEVICES = []
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
            CUDA_VISIBLE_DEVICES = [
                int(item.strip())
                for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
            ]

        # get an valid GPU
        gpuids = []
        while len(gpuids) < 1:
            # get available GPUs  # https://github.com/anderskm/gputil
            deviceIDs = GPUtil.getAvailable(order='first',
                                            limit=100,
                                            maxLoad=0.5,
                                            maxMemory=0.5,
                                            includeNan=False,
                                            excludeID=[],
                                            excludeUUID=[])
            # only use the one in CUDA_VISIBLE_DEVICES
            if len(CUDA_VISIBLE_DEVICES) > 0:
                deviceIDs = [
                    item for item in deviceIDs if item in CUDA_VISIBLE_DEVICES
                ]
                basic.outputlogMessage('on ' + machine_name +
                                       ', available GPUs:' + str(deviceIDs) +
                                       ', among visible ones:' +
                                       str(CUDA_VISIBLE_DEVICES))
            else:
                basic.outputlogMessage('on ' + machine_name +
                                       ', available GPUs:' + str(deviceIDs))

            if len(deviceIDs) < 1:
                print(datetime.now(),
                      'No available GPUs, will check again in 60 seconds')
                time.sleep(
                    60)  # wait one minute, then check the available GPUs again
                continue
            # set only the first available visible
            gpuids.append(deviceIDs[0])
            basic.outputlogMessage(
                '%d:image translation for  %s on GPU %s of %s' %
                (area_idx, area_ini, str(gpuids), machine_name))

        # run image translation
        # pytorch consider first GPUs in CUDA_VISIBLE_DEVICES as zero, so need to re-index gpu ids
        if len(CUDA_VISIBLE_DEVICES) > 0:
            gpuids = [CUDA_VISIBLE_DEVICES.index(id) for id in gpuids]

        sub_process = Process(target=image_translate_train_generate_one_domain,
                              args=(gan_project_save_dir, gan_para_file,
                                    area_src_ini, area_ini, gpuids,
                                    inf_img_list))

        sub_process.start()
        sub_tasks.append(sub_process)

        # wait until image translation has started or exceed 20 minutes
        time0 = time.time()
        elapsed_time = time.time() - time0
        while elapsed_time < 20 * 60:
            elapsed_time = time.time() - time0
            if CUT_gan_is_ready_to_train(
                    gan_project_save_dir) is True or sub_process.is_alive(
                    ) is False:
                break
            else:
                time.sleep(5)

        time.sleep(
            10
        )  # wait, allowing time for the GAN process to start, and run into problem

        if sub_process.exitcode is not None and sub_process.exitcode != 0:
            sys.exit(1)

        basic.close_remove_completed_process(sub_tasks)

    # check all the tasks already finished
    while basic.b_all_process_finish(sub_tasks) is False:
        basic.outputlogMessage('wait all tasks to finish')
        time.sleep(60)
    basic.check_exitcode_of_process(sub_tasks)
    basic.close_remove_completed_process(sub_tasks)

    save_image_dir = parameters.get_string_parameters(para_file,
                                                      'input_train_dir')
    save_label_dir = parameters.get_string_parameters(para_file,
                                                      'input_label_dir')
    merge_subImages_from_gan(multi_gan_source_regions, multi_gan_regions,
                             gan_working_dir, gan_dir_pre_name, save_image_dir,
                             save_label_dir)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of translating sub images to target domains: %.2f seconds">>time_cost.txt'
        % duration)