def read_para_values(work_dir,para_file,train_output):

    para_path = os.path.join(work_dir,para_file)

    backbone = parameters.get_string_parameters(para_path,'network_setting_ini')
    train_output['network_setting_ini'].append(backbone)

    net_ini_path = os.path.join(work_dir,backbone)
    lr = parameters.get_digit_parameters(net_ini_path,'base_learning_rate','float')
    train_output['base_learning_rate'].append(lr)

    iter_num = parameters.get_digit_parameters(net_ini_path,'iteration_num','int')
    train_output['iteration_num'].append(iter_num)

    batch_size = parameters.get_digit_parameters(net_ini_path,'batch_size','int')
    train_output['batch_size'].append(batch_size)



    buffer_size = parameters.get_digit_parameters(para_path,'buffer_size','int')
    train_output['buffer_size'].append(buffer_size)

    training_data_per = parameters.get_digit_parameters(para_path,'training_data_per','float')
    train_output['training_data_per'].append(training_data_per)

    data_augmentation = parameters.get_string_parameters(para_path,'data_augmentation')
    train_output['data_augmentation'].append(data_augmentation)

    data_aug_ignore_classes = parameters.get_string_parameters(para_path,'data_aug_ignore_classes')
    train_output['data_aug_ignore_classes'].append(data_aug_ignore_classes)

    return True
Esempio n. 2
0
def run_evaluation_main(para_file,
                        b_new_validation_data=False,
                        train_dir=None):

    print("run evaluation")
    SECONDS = time.time()

    gpu_num = 1

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')
    tf_research_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'tf_research_dir')
    print(tf_research_dir)
    if tf_research_dir is None:
        raise ValueError('tf_research_dir is not in %s' % para_file)
    if os.path.isdir(tf_research_dir) is False:
        raise ValueError('%s does not exist' % tf_research_dir)
    # sys.path.insert(0, tf_research_dir)
    # sys.path.insert(0, os.path.join(tf_research_dir,'slim'))
    # print(sys.path)
    # need to change PYTHONPATH, otherwise, deeplab cannot be found
    if os.getenv('PYTHONPATH'):
        os.environ['PYTHONPATH'] = os.getenv(
            'PYTHONPATH') + ':' + tf_research_dir + ':' + os.path.join(
                tf_research_dir, 'slim')
    else:
        os.environ['PYTHONPATH'] = tf_research_dir + ':' + os.path.join(
            tf_research_dir, 'slim')
    # os.system('echo $PYTHONPATH ')

    tf1x_python = parameters.get_file_path_parameters(network_setting_ini,
                                                      'tf1x_python')
    deeplab_train.tf1x_python = tf1x_python

    deeplab_dir = os.path.join(tf_research_dir, 'deeplab')
    WORK_DIR = os.getcwd()

    expr_name = parameters.get_string_parameters(para_file, 'expr_name')

    # prepare data for validation
    if b_new_validation_data:
        prepare_data_for_evaluation(para_file)

    run_evaluation(WORK_DIR,
                   deeplab_dir,
                   expr_name,
                   para_file,
                   network_setting_ini,
                   gpu_num,
                   train_dir=train_dir)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of running evaluation: %.2f seconds">>time_cost.txt'
        % duration)
Esempio n. 3
0
def predict_one_image_mmseg(para_file, image_path, img_save_dir, inf_list_file,
                            gpuid, trained_model):
    """ run prediction of one image
    """
    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    network_ini = parameters.get_string_parameters(para_file,
                                                   'network_setting_ini')
    base_config_file = parameters.get_string_parameters(
        network_ini, 'base_config')
    config_file = osp.basename(
        io_function.get_name_by_adding_tail(base_config_file, expr_name))

    inf_batch_size = parameters.get_digit_parameters(network_ini,
                                                     'inf_batch_size', 'int')

    patch_width = parameters.get_digit_parameters(para_file, 'inf_patch_width',
                                                  'int')
    patch_height = parameters.get_digit_parameters(para_file,
                                                   'inf_patch_height', 'int')
    adj_overlay_x = parameters.get_digit_parameters(para_file,
                                                    'inf_pixel_overlay_x',
                                                    'int')
    adj_overlay_y = parameters.get_digit_parameters(para_file,
                                                    'inf_pixel_overlay_y',
                                                    'int')

    done_indicator = '%s_done' % inf_list_file
    if os.path.isfile(done_indicator):
        basic.outputlogMessage('warning, %s exist, skip prediction' %
                               done_indicator)
        return
    if os.path.isdir(img_save_dir) is False:
        io_function.mkdir(img_save_dir)
    # use a specific GPU for prediction, only inference one image
    time0 = time.time()
    if gpuid is None:
        gpuid = 0

    predict_rsImage_mmseg(config_file,
                          trained_model,
                          image_path,
                          img_save_dir,
                          batch_size=inf_batch_size,
                          gpuid=gpuid,
                          tile_width=patch_width,
                          tile_height=patch_height,
                          overlay_x=adj_overlay_x,
                          overlay_y=adj_overlay_y)

    duration = time.time() - time0
    os.system(
        'echo "$(date): time cost of inference for image in %s: %.2f seconds">>"time_cost.txt"'
        % (inf_list_file, duration))
    # write a file to indicate that the prediction has done.
    os.system('echo %s > %s_done' % (inf_list_file, inf_list_file))

    return
Esempio n. 4
0
def split_train_val(para_file):
    print("split data set into training and validation")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    code_dir = os.path.join(os.path.dirname(sys.argv[0]), '..')
    sys.path.insert(0, code_dir)
    import parameters

    script = os.path.join(code_dir, 'datasets', 'train_test_split.py')

    training_data_per = parameters.get_digit_parameters_None_if_absence(
        para_file, 'training_data_per', 'float')
    train_sample_txt = parameters.get_string_parameters(
        para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')

    dir = 'list'
    all_img_list = os.path.join(dir, 'trainval.txt')

    # command_string = script + ' -p ' + str(training_data_per) + \
    #                  ' -t ' + train_sample_txt  + \
    #                  ' -v ' + val_sample_txt  + \
    #                  ' --shuffle ' + all_img_list
    # res = os.system(command_string)
    # if res!=0:
    #     sys.exit(1)

    Do_shuffle = True
    from datasets.train_test_split import train_test_split_main
    train_test_split_main(all_img_list, training_data_per, Do_shuffle,
                          train_sample_txt, val_sample_txt)

    # save brief information of image patches
    img_ext = parameters.get_string_parameters_None_if_absence(
        para_file, 'split_image_format')

    get_image_with_height_list(os.path.join(dir, train_sample_txt),
                               img_ext,
                               info_type='training')

    get_image_with_height_list(os.path.join(dir, val_sample_txt),
                               img_ext,
                               info_type='validation')

    # save the count of each classes in training and validation
    get_sample_count_of_each_class(os.path.join(dir, train_sample_txt),
                                   info_type='training')

    get_sample_count_of_each_class(os.path.join(dir, val_sample_txt),
                                   info_type='validation')
Esempio n. 5
0
def modify_dataset(cfg, para_file, network_setting_ini, gpu_num):
    datetype = 'RSImagePatches'
    cfg.dataset_type = datetype
    cfg.data_root = './'

    ## There are still two more crop_size in pipleline, after choosing the base_config_file, we already choose the crop size

    # image_crop_size = parameters.get_string_list_parameters(para_file, 'image_crop_size')
    # image_crop_size = [ int(item) for item in image_crop_size]
    # if len(image_crop_size) != 2 and image_crop_size[0].isdigit() and image_crop_size[1].isdigit():
    #     raise ValueError('image_crop_size should be height,width')
    # cfg.crop_size = (image_crop_size[0],image_crop_size[1])

    training_sample_list_txt = parameters.get_string_parameters(
        para_file, 'training_sample_list_txt')
    validation_sample_list_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')

    split_list = ['train', 'val', 'test']
    for split in split_list:
        # dataset in train
        cfg.data[split]['type'] = datetype
        cfg.data[split]['data_root'] = './'
        cfg.data[split]['img_dir'] = 'split_images'
        cfg.data[split]['ann_dir'] = 'split_labels'
        if split == 'train':
            cfg.data[split]['split'] = [
                osp.join('list', training_sample_list_txt)
            ]
        else:
            # set val and test to validation, when run real test (prediction) for entire RS images, we will set test again.
            cfg.data[split]['split'] = [
                osp.join('list', validation_sample_list_txt)
            ]

    # set None for test
    cfg.data['test']['img_dir'] = None
    cfg.data['test']['ann_dir'] = None
    cfg.data['test']['split'] = None

    # setting based on batch size
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    if batch_size % gpu_num != 0:
        raise ValueError('Batch size (%d) cannot be divided by gpu num (%d)' %
                         (batch_size, gpu_num))

    cfg.data['samples_per_gpu'] = int(batch_size / gpu_num)
    cfg.data['workers_per_gpu'] = int(
        batch_size / gpu_num) + 2  # set worker a litter higher to utilize CPU

    return True
Esempio n. 6
0
def group_same_area_time_observations(arae_ini_files):
    # group the observation with the same area name and time.
    same_area_time_obs_ini = {}
    for area_ini in arae_ini_files:
        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        area_time_key = area_name + '-' + area_time  # use '-' instead of '_' ('_ has been used in many place')
        if area_time_key not in same_area_time_obs_ini.keys():
            same_area_time_obs_ini[area_time_key] = []
        same_area_time_obs_ini[area_time_key].append(area_ini)

    return same_area_time_obs_ini
Esempio n. 7
0
def extract_sub_images_using_training_polygons(para_file):
    trainImg_dir = parameters.get_string_parameters(para_file,
                                                    'input_train_dir')
    labelImg_dir = parameters.get_string_parameters(para_file,
                                                    'input_label_dir')
    if os.path.isdir(trainImg_dir) and os.path.isdir(labelImg_dir):
        basic.outputlogMessage(
            'warning, sub-image and sub-label folder exists, skip extracting sub-images'
        )
        return
    # extract sub_images based on the training polgyons
    # command_string = os.path.join(eo_dir, 'workflow', 'get_sub_images_multi_regions.py') + ' ' + para_file
    # basic.os_system_exit_code(command_string)
    from get_sub_images_multi_regions import get_sub_images_multi_regions
    return get_sub_images_multi_regions(para_file)
Esempio n. 8
0
def split_an_image(para_file, image_path, save_dir, patch_w, patch_h,
                   overlay_x, overlay_y):

    split_format = parameters.get_string_parameters(para_file,
                                                    'split_image_format')
    out_format = 'PNG'  # default is PNG
    if split_format == '.tif': out_format = 'GTIFF'
    if split_format == '.jpg': out_format = 'JPEG'
    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)

    split_image.split_image(image_path,
                            save_dir,
                            patch_w,
                            patch_h,
                            overlay_x,
                            overlay_y,
                            out_format,
                            pre_name=None,
                            process_num=8)
    # get list
    patch_list = io_function.get_file_list_by_ext(split_format,
                                                  save_dir,
                                                  bsub_folder=False)
    if len(patch_list) < 1:
        print('Wanring, no images in %s' % save_dir)
        return None
    list_txt_path = save_dir + '_list.txt'
    io_function.save_list_to_txt(list_txt_path, patch_list)
    return list_txt_path
Esempio n. 9
0
def generate_image_CUT(python_path, generate_script, gan_para_file, gpu_ids,
                       image_list, save_folder):

    if os.path.isfile('generate.txt_done'):
        basic.outputlogMessage(
            'generate of new images using GAN in %s has completed previoulsy, please remove them if necessary'
            % os.getcwd())
        return True

    time0 = time.time()
    generate_tile_width = parameters.get_digit_parameters(
        gan_para_file, 'generate_tile_width', 'int')
    generate_tile_height = parameters.get_digit_parameters(
        gan_para_file, 'generate_tile_height', 'int')
    generate_overlay_x = parameters.get_digit_parameters(
        gan_para_file, 'generate_overlay_x', 'int')
    generate_overlay_y = parameters.get_digit_parameters(
        gan_para_file, 'generate_overlay_y', 'int')

    folder = os.path.basename(os.getcwd())
    img_list_txt = 'image_to_generate_list.txt'
    io_function.save_list_to_txt(img_list_txt, image_list)

    command_string = python_path + ' '  +  generate_script \
                + ' --dataset_mode '+'satelliteimage' \
                + ' --model '+ 'generate' \
                + ' --image_A_dir_txt ' + img_list_txt \
                + ' --tile_width ' + str(generate_tile_width) \
                + ' --tile_height ' + str(generate_tile_height) \
                + ' --overlay_x ' + str(generate_overlay_x) \
                + ' --overlay_y ' + str(generate_overlay_y)  \
                + ' --name ' + folder  \
                + ' --results_dir ' + save_folder  \
                + ' --gpu_ids ' + ','.join([str(item) for item in gpu_ids])

    train_max_dataset_size = parameters.get_digit_parameters_None_if_absence(
        gan_para_file, 'gen_max_dataset_size', 'int')
    if train_max_dataset_size is not None:
        command_string += ' --max_dataset_size ' + str(train_max_dataset_size)

    # if it's cycleGAN, need to assign A generator
    gan_model = parameters.get_string_parameters(gan_para_file, 'gan_model')
    if gan_model == 'cycle_gan':
        command_string += ' --model_suffix _A '  # from A to B

    # status, result = basic.exec_command_string(command_string)  # this will wait command finished
    # os.system(command_string + "&")  # don't know when it finished
    res = os.system(command_string)  # this work
    # print('command_string deeplab_inf_script: res',res)
    if res != 0:
        sys.exit(1)

    duration = time.time() - time0
    os.system(
        'echo "$(date): time cost of generate images using a GAN : %.2f seconds">>"time_cost.txt"'
        % (duration))
    # write a file to indicate that the process has completed.
    os.system('echo done > generate.txt_done')

    return True
Esempio n. 10
0
def get_train_val_sample_count(work_dir, para_file):

    train_sample_txt = parameters.get_string_parameters(
        para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')
    train_list_txt = os.path.join(work_dir, 'list', train_sample_txt)
    val_list_txt = os.path.join(work_dir, 'list', val_sample_txt)

    train_lines = io_function.read_list_from_txt(train_list_txt)
    val_lines = io_function.read_list_from_txt(val_list_txt)
    basic.outputlogMessage(
        'The count of training and validation samples are %d and %d' %
        (len(train_lines), len(val_lines)))

    return len(train_lines), len(val_lines)
Esempio n. 11
0
def split_train_val(para_file):

    train_sample_txt = parameters.get_string_parameters(
        para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')
    train_list_txt = os.path.join('list', train_sample_txt)
    val_list_txt = os.path.join('list', val_sample_txt)
    if os.path.isfile(train_list_txt) and os.path.isfile(val_list_txt):
        basic.outputlogMessage(
            'warning, split sample list exists, skip split_train_val')
        return
    # command_string = os.path.join(eo_dir, 'workflow', 'split_train_val.py') + ' ' + para_file
    # basic.os_system_exit_code(command_string)
    import split_train_val
    return split_train_val.split_train_val(para_file)
Esempio n. 12
0
def mmseg_train_main(para_file, gpu_num):
    print(datetime.now(), "train MMSegmentation")
    SECONDS = time.time()

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in the current folder: %s' %
                      (para_file, os.getcwd()))

    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')
    mmseg_repo_dir = parameters.get_directory(network_setting_ini,
                                              'mmseg_repo_dir')
    mmseg_config_dir = osp.join(mmseg_repo_dir, 'configs')
    if os.path.isdir(mmseg_config_dir) is False:
        raise ValueError('%s does not exist' % mmseg_config_dir)

    base_config_file = parameters.get_string_parameters(
        network_setting_ini, 'base_config')
    base_config_file = os.path.join(mmseg_config_dir, base_config_file)
    if os.path.isfile(base_config_file) is False:
        raise IOError('%s does not exist' % base_config_file)

    global open_mmlab_python
    open_mmlab_python = parameters.get_file_path_parameters(
        network_setting_ini, 'open-mmlab-python')

    WORK_DIR = os.getcwd()
    expr_name = parameters.get_string_parameters(para_file, 'expr_name')

    # copy the base_config_file, then save to to a new one
    config_file = osp.join(
        WORK_DIR,
        osp.basename(
            io_function.get_name_by_adding_tail(base_config_file, expr_name)))
    if updated_config_file(WORK_DIR, expr_name, base_config_file, config_file,
                           para_file, network_setting_ini, gpu_num) is False:
        raise ValueError('Getting the config file failed')

    train_evaluation_mmseg(WORK_DIR, mmseg_repo_dir, config_file, expr_name,
                           para_file, network_setting_ini, gpu_num)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of training: %.2f seconds">>time_cost.txt' %
        duration)
Esempio n. 13
0
def get_overall_miou_after_training(work_dir, para_file):

    exp_name = parameters.get_string_parameters(para_file, 'expr_name')

    iou_path = os.path.join(work_dir, exp_name, 'eval', 'miou.txt')
    if os.path.isfile(iou_path) is False:
        return False
    overall_miou = get_overall_miou(iou_path)

    return overall_miou
Esempio n. 14
0
def check_early_stopping_trained_iteration(work_dir, para_file):
    early_stop = False
    if os.path.isfile(os.path.join(work_dir, 'early_stopping.txt')):
        early_stop = True

    exp_name = parameters.get_string_parameters(
        os.path.join(work_dir, para_file), 'expr_name')
    TRAIN_LOGDIR = os.path.join(work_dir, exp_name, 'train')
    model_trained_iter = deeplab_train.get_trained_iteration(TRAIN_LOGDIR)

    return early_stop, model_trained_iter
Esempio n. 15
0
def main(options, args):
    ref_image = args[0]
    pts_files = args[1]
    warp_image = args[2]

    dem_file = parameters.get_string_parameters('','dem_datum_wgs84')
    output = os.path.splitext(warp_image)[0]+'.gcp'

    convert_gcp_format(ref_image, warp_image, dem_file, pts_files, output)

    pass
Esempio n. 16
0
def main(options, args):

    para_file = args[0]
    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')

    global tf1x_python
    tf1x_python = parameters.get_file_path_parameters(network_setting_ini,
                                                      'tf1x_python')

    build_script = os.path.join(code_dir, 'datasets', 'build_TFrecord.py')
    call_build_TFrecord(build_script, para_file)
Esempio n. 17
0
def get_early_stopping_trained_iteration(work_dir, para_file, train_output):
    if os.path.isfile(os.path.join(work_dir, 'early_stopping.txt')):
        train_output['early_stopping'].append('Yes')
    else:
        train_output['early_stopping'].append('No')

    exp_name = parameters.get_string_parameters(
        os.path.join(work_dir, para_file), 'expr_name')
    TRAIN_LOGDIR = os.path.join(work_dir, exp_name, 'train')
    trained_iter = deeplab_train.get_trained_iteration(TRAIN_LOGDIR)
    train_output['model_train_iter'].append(trained_iter)

    return True
Esempio n. 18
0
    def __init__(self, args):
        self.args = args
        # self.directory = os.path.join('run', args.dataset, args.checkname)
        # self.runs = sorted(glob.glob(os.path.join(self.directory, 'experiment_*')))
        # # here is a potential bug, when run_id is 10, then it cannot increase to 11
        # run_id = int(self.runs[-1].split('_')[-1]) + 1 if self.runs else 0
        #
        # self.experiment_dir = os.path.join(self.directory, 'experiment_{}'.format(str(run_id)))

        self.experiment_dir = parameters.get_string_parameters(args.para_file, 'expr_name')

        if not os.path.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)
def copy_subImages_labels_directly(subImage_dir, subLabel_dir, area_ini):

    input_image_dir = parameters.get_directory_None_if_absence(
        area_ini, 'input_image_dir')
    # it is ok consider a file name as pattern and pass it the following functions to get file list
    input_image_or_pattern = parameters.get_string_parameters(
        area_ini, 'input_image_or_pattern')

    # label raster folder
    label_raster_dir = parameters.get_directory_None_if_absence(
        area_ini, 'label_raster_dir')
    sub_images_list = []
    label_path_list = []

    if os.path.isdir(subImage_dir) is False:
        io_function.mkdir(subImage_dir)
    if os.path.isdir(subLabel_dir) is False:
        io_function.mkdir(subLabel_dir)

    sub_images = io_function.get_file_list_by_pattern(input_image_dir,
                                                      input_image_or_pattern)
    for sub_img in sub_images:
        # find the corresponding label raster
        label_name = io_function.get_name_by_adding_tail(
            os.path.basename(sub_img), 'label')
        label_path = os.path.join(label_raster_dir, label_name)
        if os.path.isfile(label_path):
            sub_images_list.append(sub_img)
            label_path_list.append(label_path)
        else:
            print('Warning, cannot find label for %s in %s' %
                  (sub_img, label_raster_dir))

    # copy sub-images, adding to txt files
    with open('sub_images_labels_list.txt', 'a') as f_obj:
        for tif_path, label_file in zip(sub_images_list, label_path_list):
            if label_file is None:
                continue
            dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path))

            # copy sub-images
            io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=True)

            dst_label_file = os.path.join(subLabel_dir,
                                          os.path.basename(label_file))
            io_function.copy_file_to_dst(label_file,
                                         dst_label_file,
                                         overwrite=True)

            sub_image_label_str = dst_subImg + ":" + dst_label_file + '\n'
            f_obj.writelines(sub_image_label_str)
Esempio n. 20
0
    def load_Planet(self, image_dir, label_dir, subset):
        """Load a subset of the Planet images
        image_dir: The root directory of the Planet images (already split ).
        subset: What to load (train, val, minival, valminusminival)
        class_ids: If provided, only loads images that have the given classes.
        """
        # Add classes
        # self.add_class("planet", 1, "thawslump")    # source, class_id, class_name

        self.add_class("planet_TS", 1,
                       "thawslump")  # source, class_id, class_name
        self.add_class("planet_noTS", 2,
                       "thawslump_similar")  # source, class_id, class_name

        # Add images in the image_dir
        if subset == 'train':
            images_list = 'list/train_list.txt'
        elif subset == 'val':
            images_list = 'list/val_list.txt'
        else:
            raise ValueError(
                "'{}' is not recognized. Use 'train' or 'val' ".format(subset))

        # dataset = os.path.basename(dataset_split)[:-4]
        filenames = [x.strip('\n') for x in open(images_list, 'r')]

        img_ext = parameters.get_string_parameters(para_file,
                                                   'split_image_format')
        for i, image_name in enumerate(filenames):
            # source, image_id, path, **kwargs
            image_path = os.path.join(os.path.abspath(image_dir),
                                      image_name + img_ext)
            label_path = os.path.join(os.path.abspath(label_dir),
                                      image_name + img_ext)
            # img_source = 'planet'
            if image_path.find('class_1') > 0:
                img_source = 'planet_TS'
            elif image_path.find('class_2') > 0:
                img_source = 'planet_noTS'
            else:
                raise ValueError('unknow class in file name: %s ' % image_name)
            self.add_image(img_source,
                           image_id=i,
                           path=image_path,
                           label_path=label_path,
                           patch=image_name)
Esempio n. 21
0
def objective_overall_miou(lr, iter_num,batch_size,backbone,buffer_size,training_data_per,data_augmentation,data_aug_ignore_classes):

    sys.path.insert(0, code_dir)
    sys.path.insert(0, os.path.join(code_dir,'workflow'))   # for some module in workflow folder
    import basic_src.io_function as io_function
    import parameters
    import workflow.whole_procedure as whole_procedure


    para_file = 'main_para_exp9.ini'
    work_dir = os.getcwd()

    # create a training folder
    copy_ini_files(ini_dir,work_dir,para_file,area_ini_list,backbone)

    exp_name = parameters.get_string_parameters(para_file,'expr_name')

    # change para_file
    modify_parameter(os.path.join(work_dir, para_file),'network_setting_ini',backbone)
    modify_parameter(os.path.join(work_dir, backbone),'base_learning_rate',lr)
    modify_parameter(os.path.join(work_dir, backbone),'batch_size',batch_size)
    modify_parameter(os.path.join(work_dir, backbone),'iteration_num',iter_num)

    modify_parameter(os.path.join(work_dir, para_file),'buffer_size',buffer_size)
    modify_parameter(os.path.join(work_dir, para_file),'training_data_per',training_data_per)
    modify_parameter(os.path.join(work_dir, para_file),'data_augmentation',data_augmentation)
    modify_parameter(os.path.join(work_dir, para_file),'data_aug_ignore_classes',data_aug_ignore_classes)

    # run training
    whole_procedure.run_whole_procedure(para_file, b_train_only=True)

    # remove files to save storage
    os.system('rm -rf %s/exp*/init_models'%work_dir)
    os.system('rm -rf %s/exp*/eval/events.out.tfevents*'%work_dir) # don't remove miou.txt
    # os.system('rm -rf %s/exp*/train'%work_dir)            # don't remove train folder
    os.system('rm -rf %s/exp*/vis'%work_dir)        # don't remove the export folder (for prediction)

    os.system('rm -rf %s/multi_inf_results'%work_dir)
    os.system('rm -rf %s/split*'%work_dir)
    os.system('rm -rf %s/sub*s'%work_dir)   # only subImages and subLabels
    os.system('rm -rf %s/sub*s_delete'%work_dir)   # only subImages_delete and subLabels_delete
    os.system('rm -rf %s/tfrecord*'%work_dir)

    iou_path = os.path.join(work_dir,exp_name,'eval','miou.txt')
    overall_miou = get_overall_miou(iou_path)
    return overall_miou
Esempio n. 22
0
def set_pythonpath(para_file):

    network_ini = parameters.get_string_parameters(para_file,
                                                   'network_setting_ini')
    mmseg_repo_dir = parameters.get_directory(network_ini, 'mmseg_repo_dir')
    mmseg_code_dir = osp.join(mmseg_repo_dir, 'mmseg')

    if os.path.isdir(mmseg_code_dir) is False:
        raise ValueError('%s does not exist' % mmseg_code_dir)

    # set PYTHONPATH to use my modified version of mmseg
    if os.getenv('PYTHONPATH'):
        os.environ['PYTHONPATH'] = os.getenv(
            'PYTHONPATH') + ':' + mmseg_code_dir
    else:
        os.environ['PYTHONPATH'] = mmseg_code_dir
    print('\nPYTHONPATH is: ', os.getenv('PYTHONPATH'))
def get_miou_of_overall_and_class_1_step(work_dir,para_file,train_output):

    exp_name = parameters.get_string_parameters(os.path.join(work_dir,para_file), 'expr_name')
    miou_path = os.path.join(work_dir,exp_name,'eval','miou.txt')
    if os.path.isfile(miou_path) is False:
        print("warning, no miou.txt in %s"%work_dir)
        train_output['class_1'].append(0)
        train_output['overall'].append(0)
        train_output['step'].append(0)
        return False

    iou_dict = io_function.read_dict_from_txt_json(miou_path)
    train_output['class_1'].append(iou_dict['class_1'][-1])
    train_output['overall'].append(iou_dict['overall'][-1])
    train_output['step'].append(iou_dict['step'][-1])

    return True
Esempio n. 24
0
def objective_overall_miou(data_augmentation):

    sys.path.insert(0, code_dir)
    sys.path.insert(0, os.path.join(
        code_dir, 'workflow'))  # for some module in workflow folder
    import basic_src.io_function as io_function
    import parameters
    import workflow.whole_procedure as whole_procedure

    para_file = 'main_para_dataAug.ini'
    work_dir = os.getcwd()

    # create a training folder
    copy_data_ini_exe_files(data_ini_dir, work_dir)

    exp_name = parameters.get_string_parameters(para_file, 'expr_name')

    # change para_file
    modify_parameter(os.path.join(work_dir, para_file), 'data_augmentation',
                     data_augmentation)

    # run training
    # whole_procedure.run_whole_procedure(para_file, b_train_only=True)
    res = os.system('./exe_tesia.sh')
    if res != 0:
        sys.exit(1)

    # remove files to save storage
    os.system('rm -rf %s/exp*/init_models' % work_dir)
    os.system('rm -rf %s/exp*/eval/events.out.tfevents*' %
              work_dir)  # don't remove miou.txt
    # os.system('rm -rf %s/exp*/train'%work_dir)            # don't remove train folder
    os.system('rm -rf %s/exp*/vis' %
              work_dir)  # don't remove the export folder (for prediction)

    os.system('rm -rf %s/multi_inf_results' % work_dir)
    os.system('rm -rf %s/split*' % work_dir)
    os.system('rm -rf %s/sub*s' % work_dir)  # only subImages and subLabels
    os.system('rm -rf %s/sub*s_delete' %
              work_dir)  # only subImages_delete and subLabels_delete
    os.system('rm -rf %s/tfrecord*' % work_dir)

    iou_path = os.path.join(work_dir, exp_name, 'eval', 'miou.txt')
    overall_miou = get_overall_miou(iou_path)
    return overall_miou
Esempio n. 25
0
def test_dataloader():
    # test, run in ~/Data/tmp_data/test_mmsegmentation/test_landuse_dl
    para_file = 'main_para.ini'
    set_pythonpath(para_file)
    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    trained_model = '%s/latest.pth' % expr_name

    # test rgb, using rgb in Willow River
    # img_idx = 0
    # image_path = os.path.expanduser('~/Data/Arctic/canada_arctic/Willow_River/Planet2020/20200818_mosaic_8bit_rgb.tif')
    # img_save_dir = os.path.join('predict_output','I%d' % img_idx)
    # io_function.mkdir(img_save_dir)
    # inf_list_file = os.path.join('predict_output','%d.txt'%img_idx)
    # gpuid = None
    # predict_one_image_mmseg(para_file, image_path, img_save_dir, inf_list_file, gpuid, trained_model)
    #
    # # curr_dir,img_idx, area_save_dir, test_id
    # curr_dir = os.getcwd()
    # inf_results_to_shapefile(curr_dir,img_idx,'predict_output','1')

    ############ test nirGB, using rgb in Willow River
    # img_idx = 1
    # image_path = os.path.expanduser('~/Data/Arctic/canada_arctic/Willow_River/Planet2020/20200818_mosaic_8bit_nirGB.tif')
    # img_save_dir = os.path.join('predict_output','I%d' % img_idx)
    # io_function.mkdir(img_save_dir)
    # inf_list_file = os.path.join('predict_output','%d.txt'%img_idx)
    # gpuid = None
    #
    # predict_one_image_mmseg(para_file, image_path, img_save_dir, inf_list_file, gpuid, trained_model)
    #
    # # curr_dir,img_idx, area_save_dir, test_id
    # curr_dir = os.getcwd()
    # inf_results_to_shapefile(curr_dir,img_idx,'predict_output','1')

    ######### try a tiny images
    img_idx = 2
    image_path = os.path.expanduser(
        '~/Data/test_mmsegmentation/test_landuse_dl/WR_nirGB_sub_images/20200818_mosaic_8bit_nirGB_0_class_1.tif'
    )
    img_save_dir = os.path.join('predict_output', 'I%d' % img_idx)
    io_function.mkdir(img_save_dir)
    inf_list_file = os.path.join('predict_output', '%d.txt' % img_idx)
    gpuid = None
    predict_one_image_mmseg(para_file, image_path, img_save_dir, inf_list_file,
                            gpuid, trained_model)
Esempio n. 26
0
def prepare_data_for_evaluation(para_file):

    import workflow.whole_procedure as whole_procedure

    # get subimages
    whole_procedure.extract_sub_images_using_training_polygons(para_file)

    # split image
    whole_procedure.split_sub_images(para_file)

    # whole_procedure.training_img_augment(para_file)
    # whole_procedure.split_train_val(para_file)

    # put all the images patch for evaluation
    trainval = os.path.join('list', 'trainval.txt')
    test_list_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')
    test_list_txt = os.path.join('list', test_list_txt)
    io_function.copy_file_to_dst(trainval, test_list_txt, overwrite=True)

    # covert image to tf-records
    whole_procedure.build_TFrecord_tf1x(para_file)
Esempio n. 27
0
def predict_one_image_yolo(para_file, image_path, img_save_dir, inf_list_file,
                           gpuid, trained_model):

    config_file = parameters.get_string_parameters(
        para_file, 'network_setting_ini')  # 'yolov4_obj.cfg'
    yolo_data = os.path.join('data', 'obj.data')
    # b_python_api = False
    inf_batch_size = parameters.get_digit_parameters(para_file,
                                                     'inf_batch_size', 'int')
    b_python_api = parameters.get_bool_parameters(para_file,
                                                  'b_inf_use_python_api')

    done_indicator = '%s_done' % inf_list_file
    if os.path.isfile(done_indicator):
        basic.outputlogMessage('warning, %s exist, skip prediction' %
                               done_indicator)
        return
    # use a specific GPU for prediction, only inference one image
    time0 = time.time()
    if gpuid is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpuid)

    predict_remoteSensing_image(para_file,
                                image_path,
                                img_save_dir,
                                trained_model,
                                config_file,
                                yolo_data,
                                batch_size=inf_batch_size,
                                b_python_api=b_python_api)

    duration = time.time() - time0
    os.system(
        'echo "$(date): time cost of inference for image in %s: %.2f seconds">>"time_cost.txt"'
        % (inf_list_file, duration))
    # write a file to indicate that the prediction has done.
    os.system('echo %s > %s_done' % (inf_list_file, inf_list_file))
    return
Esempio n. 28
0
def main(options, args):

    time_str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    print2file(log, time_str)

    para_file = options.para_file
    k_value = int(args[0])
    test_num = int(args[1])

    print2file(log, 'kvalue : %d' % k_value)
    print2file(log, 'test_num : %d' % test_num)

    global trained_model_dir
    trained_model_dir = parameters.get_string_parameters(
        para_file, 'expr_name')

    # get the path of multi training polygons
    multi_training_files = parameters.get_string_parameters_None_if_absence(
        para_file, 'multi_training_files')
    if multi_training_files is None:
        raise ValueError('multi_training_files is not set in the %s' %
                         para_file)

    io_function.is_file_exist(multi_training_files)

    # backup the original training file which contains the full set of polyogns
    training_files_allPolygons = io_function.get_name_by_adding_tail(
        multi_training_files, 'allPolygons')
    if os.path.isfile(training_files_allPolygons) is False:
        io_function.copy_file_to_dst(multi_training_files,
                                     training_files_allPolygons)
    else:
        basic.outputlogMessage(
            'The full set polygons already exist')  #%multi_training_files

    # training on using the k subset
    train_kfold_cross_val(training_files_allPolygons, multi_training_files,
                          k_value, test_num)
Esempio n. 29
0
def train_evaluation_deeplab_separate(WORK_DIR, deeplab_dir, expr_name,
                                      para_file, network_setting_ini, gpu_num):
    '''
    in "train_evaluation_deeplab", run training, stop, then evaluation, then traininng, make learning rate strange, and the results worse.
    so in this function, we start two process, one for training, another for evaluation (run on CPU)
    '''
    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    # validation interval (epoch), do
    # validation_interval = parameters.get_digit_parameters_None_if_absence(para_file,'validation_interval','int')

    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True

    save_interval_secs = 1200  # default is 1200 second for saving model
    save_summaries_secs = 600  # default is 600 second for saving summaries
    eval_interval_secs = save_interval_secs  # default is 300 second for running evaluation, if no new saved model, no need to run evaluation?

    train_process = Process(
        target=train_deeplab,
        args=(train_script, dataset, train_split, num_of_classes,
              base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR,
              dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2,
              train_atrous_rates3, train_output_stride, crop_size_str,
              batch_size, iteration_num, depth_multiplier,
              decoder_output_stride, aspp_convs_filters,
              b_initialize_last_layer))
    train_process.start()
    time.sleep(60)  # wait
    if train_process.exitcode is not None and train_process.exitcode != 0:
        sys.exit(1)

    # eval_process.start()
    # time.sleep(10)  # wait
    # if eval_process.exitcode is not None and eval_process.exitcode != 0:
    #     sys.exit(1)

    while True:

        # only run evaluation when there is new trained model
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        basic.outputlogMessage(
            'Already trained iteration: %d, latest evaluation at %d step' %
            (already_trained_iteration, miou_dict['step'][-1]))
        if already_trained_iteration > miou_dict['step'][-1]:

            # run evaluation and wait until it finished
            gpuid = ""  # set gpuid to empty string, making evaluation run on CPU
            evl_script = os.path.join(deeplab_dir, 'eval.py')
            evl_split = os.path.splitext(
                parameters.get_string_parameters(
                    para_file, 'validation_sample_list_txt'))[0]
            # max_eva_number = -1  # run as many evaluation as possible, --eval_interval_secs (default is 300 seconds)
            max_eva_number = 1  # only run once inside the while loop, use while loop to control multiple evaluation
            eval_process = Process(
                target=evaluation_deeplab,
                args=(evl_script, dataset, evl_split, num_of_classes,
                      model_variant, inf_atrous_rates1, inf_atrous_rates2,
                      inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                      EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number,
                      depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, gpuid, eval_interval_secs))
            eval_process.start(
            )  # put Process inside while loop to avoid error: AssertionError: cannot start a process twice
            while eval_process.is_alive():
                time.sleep(5)

        # check if need early stopping
        if b_early_stopping:
            print(datetime.now(), 'check early stopping')
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            if 'overall' in miou_dict.keys() and len(
                    miou_dict['overall']) >= 5:
                # if the last five miou did not improve, then stop training
                if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                          ):  # 0.0001 (%0.01)  # 0.5 %
                    basic.outputlogMessage(
                        'early stopping: stop training because overall miou did not improved in the last five evaluation'
                    )
                    output_early_stopping_message(TRAIN_LOGDIR)

                    # train_process.kill()    # this one seems not working
                    # subprocess pid different from ps output
                    # https://stackoverflow.com/questions/4444141/subprocess-pid-different-from-ps-output
                    # os.system('kill ' + str(train_process.pid)) # still not working.  train_process.pid is not the one output by ps -aux

                    # train_process.terminate()   # Note that descendant processes of the process will not be terminated
                    # train_process.join()        # Wait until child process terminates

                    with open('train_py_pid.txt', 'r') as f_obj:
                        lines = f_obj.readlines()
                        train_pid = int(lines[0].strip())
                        os.system('kill ' + str(train_pid))
                        basic.outputlogMessage(
                            'kill training processing with id: %d' % train_pid)

                    break  # this breaks the while loop, making that it may not evaluate on some new saved model.

        # if the evaluation step is less than saved model iteration, run another iteration again immediately
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        if already_trained_iteration > miou_dict['step'][-1]:
            continue

        # if finished training
        if train_process.is_alive() is False:
            break
        # # if eval_process exit, then quit training as well
        # if eval_process.is_alive() is False and train_process.is_alive():
        #     train_process.kill()
        #     break
        time.sleep(eval_interval_secs)  # wait for next evaluation

    # save loss value to disk
    get_loss_learning_rate_list(TRAIN_LOGDIR)
    # get miou again
    miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)

    # eval_process did not exit as expected, kill it again.
    # os.system('kill ' + str(eval_process.pid))

    # get iou and backup
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    patch_info = os.path.join(WORK_DIR, 'sub_images_patches_info.txt')

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)
    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)

    new_patch_info = os.path.join(backup_dir,
                                  test_id + '_' + os.path.basename(patch_info))
    io_function.copy_file_to_dst(patch_info, new_patch_info, overwrite=True)

    # plot mIOU, loss, and learnint rate curves, and backup
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
Esempio n. 30
0
def train_evaluation_deeplab(WORK_DIR, deeplab_dir, expr_name, para_file,
                             network_setting_ini, gpu_num):

    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    evl_script = os.path.join(deeplab_dir, 'eval.py')
    evl_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'validation_sample_list_txt'))[0]
    max_eva_number = 1

    # validation interval (epoch)
    validation_interval = parameters.get_digit_parameters_None_if_absence(
        para_file, 'validation_interval', 'int')
    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True
    if validation_interval is None:
        basic.outputlogMessage(
            'No input validation_interval, so training to %d, then evaluating in the end'
            % iteration_num)
        # run training
        train_deeplab(train_script, dataset, train_split, num_of_classes,
                      base_learning_rate, model_variant, init_checkpoint,
                      TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1,
                      train_atrous_rates2, train_atrous_rates3,
                      train_output_stride, crop_size_str, batch_size,
                      iteration_num, depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, b_initialize_last_layer)

        # run evaluation
        evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                           model_variant, inf_atrous_rates1, inf_atrous_rates2,
                           inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                           EVAL_LOGDIR, dataset_dir, crop_size_str,
                           max_eva_number, depth_multiplier,
                           decoder_output_stride, aspp_convs_filters)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        get_loss_learning_rate_list(TRAIN_LOGDIR)
    else:
        basic.outputlogMessage(
            'training to the maximum iteration of %d, and evaluating very %d epoch(es)'
            % (iteration_num, validation_interval))
        for epoch in range(validation_interval,
                           total_epoches + validation_interval,
                           validation_interval):

            to_iter_num = min(epoch * iter_per_epoch, iteration_num)
            if to_iter_num <= already_trained_iteration:
                continue
            basic.outputlogMessage(
                'training and evaluating to %d epoches (to iteration: %d)' %
                (epoch, to_iter_num))

            # run training
            train_deeplab(train_script, dataset, train_split, num_of_classes,
                          base_learning_rate, model_variant, init_checkpoint,
                          TRAIN_LOGDIR, dataset_dir, gpu_num,
                          train_atrous_rates1, train_atrous_rates2,
                          train_atrous_rates3, train_output_stride,
                          crop_size_str, batch_size, to_iter_num,
                          depth_multiplier, decoder_output_stride,
                          aspp_convs_filters, b_initialize_last_layer)

            # run evaluation
            evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                               model_variant, inf_atrous_rates1,
                               inf_atrous_rates2, inf_atrous_rates3,
                               inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR,
                               dataset_dir, crop_size_str, max_eva_number,
                               depth_multiplier, decoder_output_stride,
                               aspp_convs_filters)

            # get miou
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            # save loss value to disk
            get_loss_learning_rate_list(TRAIN_LOGDIR)
            # check if need to early stopping
            if b_early_stopping:
                if len(miou_dict['overall']) >= 5:
                    # if the last five miou did not improve, then stop training
                    if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                              ):  # 0.0001 (%0.01)  # 0.5 %
                        basic.outputlogMessage(
                            'early stopping: stop training because overall miou did not improved in the last five evaluation'
                        )
                        output_early_stopping_message(TRAIN_LOGDIR)
                        break

    # plot mIOU, loss, and learnint rate curves
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)

    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)