Exemple #1
0
def inference(para_file):
    outdir = parameters.get_directory(para_file, 'inf_output_dir')
    # don't remove it automatically
    # if os.path.isdir(outdir):
    #     io_function.delete_file_or_dir(outdir)
    # the script will check whether each image has been predicted
    command_string = os.path.join(eo_dir, 'workflow',
                                  'parallel_prediction.py') + ' ' + para_file
    basic.os_system_exit_code(command_string)
Exemple #2
0
def set_pythonpath(para_file):

    network_ini = parameters.get_string_parameters(para_file,
                                                   'network_setting_ini')
    mmseg_repo_dir = parameters.get_directory(network_ini, 'mmseg_repo_dir')
    mmseg_code_dir = osp.join(mmseg_repo_dir, 'mmseg')

    if os.path.isdir(mmseg_code_dir) is False:
        raise ValueError('%s does not exist' % mmseg_code_dir)

    # set PYTHONPATH to use my modified version of mmseg
    if os.getenv('PYTHONPATH'):
        os.environ['PYTHONPATH'] = os.getenv(
            'PYTHONPATH') + ':' + mmseg_code_dir
    else:
        os.environ['PYTHONPATH'] = mmseg_code_dir
    print('\nPYTHONPATH is: ', os.getenv('PYTHONPATH'))
Exemple #3
0
def mmseg_train_main(para_file, gpu_num):
    print(datetime.now(), "train MMSegmentation")
    SECONDS = time.time()

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in the current folder: %s' %
                      (para_file, os.getcwd()))

    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')
    mmseg_repo_dir = parameters.get_directory(network_setting_ini,
                                              'mmseg_repo_dir')
    mmseg_config_dir = osp.join(mmseg_repo_dir, 'configs')
    if os.path.isdir(mmseg_config_dir) is False:
        raise ValueError('%s does not exist' % mmseg_config_dir)

    base_config_file = parameters.get_string_parameters(
        network_setting_ini, 'base_config')
    base_config_file = os.path.join(mmseg_config_dir, base_config_file)
    if os.path.isfile(base_config_file) is False:
        raise IOError('%s does not exist' % base_config_file)

    global open_mmlab_python
    open_mmlab_python = parameters.get_file_path_parameters(
        network_setting_ini, 'open-mmlab-python')

    WORK_DIR = os.getcwd()
    expr_name = parameters.get_string_parameters(para_file, 'expr_name')

    # copy the base_config_file, then save to to a new one
    config_file = osp.join(
        WORK_DIR,
        osp.basename(
            io_function.get_name_by_adding_tail(base_config_file, expr_name)))
    if updated_config_file(WORK_DIR, expr_name, base_config_file, config_file,
                           para_file, network_setting_ini, gpu_num) is False:
        raise ValueError('Getting the config file failed')

    train_evaluation_mmseg(WORK_DIR, mmseg_repo_dir, config_file, expr_name,
                           para_file, network_setting_ini, gpu_num)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of training: %.2f seconds">>time_cost.txt' %
        duration)
Exemple #4
0
def postProcess(para_file, inf_post_note, b_skip_getshp=False, test_id=None):
    # test_id is the related to training

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    # the test string in 'exe.sh'
    test_note = inf_post_note

    WORK_DIR = os.getcwd()

    SECONDS = time.time()

    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')

    inf_dir = parameters.get_directory(para_file, 'inf_output_dir')
    if test_id is None:
        test_id = os.path.basename(WORK_DIR) + '_' + expr_name

    # get name of inference areas
    multi_inf_regions = parameters.get_string_list_parameters(
        para_file, 'inference_regions')

    # run post-processing parallel
    # max_parallel_postProc_task = 8

    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    io_function.mkdir(backup_dir)

    # loop each inference regions
    sub_tasks = []
    same_area_time_inis = group_same_area_time_observations(multi_inf_regions)
    region_eva_reports = {}
    for key in same_area_time_inis.keys():
        multi_observations = same_area_time_inis[key]
        area_name = parameters.get_string_parameters(
            multi_observations[0],
            'area_name')  # they have the same name and time
        area_time = parameters.get_string_parameters(multi_observations[0],
                                                     'area_time')
        merged_shp_list = []
        map_raster_list_2d = [None] * len(multi_observations)
        for area_idx, area_ini in enumerate(multi_observations):
            area_remark = parameters.get_string_parameters(
                area_ini, 'area_remark')
            area_save_dir, shp_pre, _ = get_observation_save_dir_shp_pre(
                inf_dir, area_name, area_time, area_remark, test_id)

            # get image list
            inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')
            # it is ok consider a file name as pattern and pass it the following functions to get file list
            inf_image_or_pattern = parameters.get_string_parameters(
                area_ini, 'inf_image_or_pattern')
            inf_img_list = io_function.get_file_list_by_pattern(
                inf_image_dir, inf_image_or_pattern)
            img_count = len(inf_img_list)
            if img_count < 1:
                raise ValueError(
                    'No image for inference, please check inf_image_dir and inf_image_or_pattern in %s'
                    % area_ini)

            merged_shp = os.path.join(WORK_DIR, area_save_dir,
                                      shp_pre + '.shp')
            if b_skip_getshp:
                pass
            else:
                # post image one by one
                result_shp_list = []
                map_raster_list = []
                for img_idx, img_path in enumerate(inf_img_list):
                    out_shp, out_raster = inf_results_to_shapefile(
                        WORK_DIR, img_idx, area_save_dir, test_id)
                    if out_shp is None or out_raster is None:
                        continue
                    result_shp_list.append(os.path.join(WORK_DIR, out_shp))
                    map_raster_list.append(out_raster)
                # merge shapefiles
                if merge_shape_files(result_shp_list, merged_shp) is False:
                    continue
                map_raster_list_2d[area_idx] = map_raster_list

            merged_shp_list.append(merged_shp)

        if b_skip_getshp is False:
            # add occurrence to each polygons
            get_occurence_for_multi_observation(merged_shp_list)

        for area_idx, area_ini in enumerate(multi_observations):
            area_remark = parameters.get_string_parameters(
                area_ini, 'area_remark')
            area_save_dir, shp_pre, area_remark_time = get_observation_save_dir_shp_pre(
                inf_dir, area_name, area_time, area_remark, test_id)

            merged_shp = os.path.join(WORK_DIR, area_save_dir,
                                      shp_pre + '.shp')
            if os.path.isfile(merged_shp) is False:
                print('Warning, %s not exist, skip' % merged_shp)
                continue

            # add attributes to shapefile
            # add_attributes_script = os.path.join(code_dir,'datasets', 'get_polygon_attributes.py')
            shp_attributes = os.path.join(WORK_DIR, area_save_dir,
                                          shp_pre + '_post_NOrm.shp')
            # add_polygon_attributes(add_attributes_script,merged_shp, shp_attributes, para_file, area_ini )
            add_polygon_attributes(merged_shp, shp_attributes, para_file,
                                   area_ini)

            # remove polygons
            # rm_polygon_script = os.path.join(code_dir,'datasets', 'remove_mappedPolygons.py')
            shp_post = os.path.join(WORK_DIR, area_save_dir,
                                    shp_pre + '_post.shp')
            # remove_polygons(rm_polygon_script,shp_attributes, shp_post, para_file)
            remove_polygons_main(shp_attributes, shp_post, para_file)

            # evaluate the mapping results
            # eval_shp_script = os.path.join(code_dir,'datasets', 'evaluation_result.py')
            out_report = os.path.join(WORK_DIR, area_save_dir,
                                      shp_pre + '_evaluation_report.txt')
            # evaluation_polygons(eval_shp_script, shp_post, para_file, area_ini,out_report)
            evaluation_polygons(shp_post, para_file, area_ini, out_report)

            ##### copy and backup files ######
            # copy files to result_backup
            if len(test_note) > 0:
                backup_dir_area = os.path.join(
                    backup_dir, area_name + '_' + area_remark_time + '_' +
                    test_id + '_' + test_note)
            else:
                backup_dir_area = os.path.join(
                    backup_dir,
                    area_name + '_' + area_remark_time + '_' + test_id)
            io_function.mkdir(backup_dir_area)
            if len(test_note) > 0:
                bak_merged_shp = os.path.join(
                    backup_dir_area, '_'.join([shp_pre, test_note]) + '.shp')
                bak_post_shp = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'post', test_note]) + '.shp')
                bak_eva_report = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'eva_report', test_note]) + '.txt')
                bak_area_ini = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'region', test_note]) + '.ini')
            else:
                bak_merged_shp = os.path.join(backup_dir_area,
                                              '_'.join([shp_pre]) + '.shp')
                bak_post_shp = os.path.join(
                    backup_dir_area, '_'.join([shp_pre, 'post']) + '.shp')
                bak_eva_report = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'eva_report']) + '.txt')
                bak_area_ini = os.path.join(
                    backup_dir_area, '_'.join([shp_pre, 'region']) + '.ini')

            io_function.copy_shape_file(merged_shp, bak_merged_shp)
            io_function.copy_shape_file(shp_post, bak_post_shp)
            if os.path.isfile(out_report):
                io_function.copy_file_to_dst(out_report,
                                             bak_eva_report,
                                             overwrite=True)
            io_function.copy_file_to_dst(area_ini,
                                         bak_area_ini,
                                         overwrite=True)

            # copy map raster
            b_backup_map_raster = parameters.get_bool_parameters_None_if_absence(
                area_ini, 'b_backup_map_raster')
            if b_backup_map_raster is True:
                if map_raster_list_2d[area_idx] is not None:
                    for map_tif in map_raster_list_2d[area_idx]:
                        bak_map_tif = os.path.join(backup_dir_area,
                                                   os.path.basename(map_tif))
                        io_function.copy_file_to_dst(map_tif,
                                                     bak_map_tif,
                                                     overwrite=True)

            region_eva_reports[shp_pre] = bak_eva_report

    if len(test_note) > 0:
        bak_para_ini = os.path.join(
            backup_dir, '_'.join([test_id, 'para', test_note]) + '.ini')
        bak_network_ini = os.path.join(
            backup_dir, '_'.join([test_id, 'network', test_note]) + '.ini')
        bak_time_cost = os.path.join(
            backup_dir, '_'.join([test_id, 'time_cost', test_note]) + '.txt')
    else:
        bak_para_ini = os.path.join(backup_dir,
                                    '_'.join([test_id, 'para']) + '.ini')
        bak_network_ini = os.path.join(backup_dir,
                                       '_'.join([test_id, 'network']) + '.ini')
        bak_time_cost = os.path.join(backup_dir,
                                     '_'.join([test_id, 'time_cost']) + '.txt')
    io_function.copy_file_to_dst(para_file, bak_para_ini)
    io_function.copy_file_to_dst(network_setting_ini, bak_network_ini)
    if os.path.isfile('time_cost.txt'):
        io_function.copy_file_to_dst('time_cost.txt', bak_time_cost)

    # output the evaluation report to screen
    for key in region_eva_reports.keys():
        report = region_eva_reports[key]
        if os.path.isfile(report) is False:
            continue
        print('evaluation report for %s:' % key)
        os.system('head -n 7 %s' % report)

    # output evaluation report to table
    if len(test_note) > 0:
        out_table = os.path.join(
            backup_dir,
            '_'.join([test_id, 'accuracy_table', test_note]) + '.xlsx')
    else:
        out_table = os.path.join(
            backup_dir, '_'.join([test_id, 'accuracy_table']) + '.xlsx')
    eva_reports = [
        region_eva_reports[key] for key in region_eva_reports
        if os.path.isfile(region_eva_reports[key])
    ]
    eva_report_to_tables.eva_reports_to_table(eva_reports, out_table)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of post-procesing: %.2f seconds">>time_cost.txt'
        % duration)
def main(options, args):

    print(
        "%s : prediction using the trained model (run parallel if use multiple GPUs) "
        % os.path.basename(sys.argv[0]))
    machine_name = os.uname()[1]
    start_time = datetime.datetime.now()

    para_file = args[0]
    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    basic.setlogfile('parallel_predict_Log.txt')

    deeplab_inf_script = os.path.join(code_dir, 'deeplabBased',
                                      'deeplab_inference.py')
    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')

    global tf1x_python
    tf1x_python = parameters.get_file_path_parameters(network_setting_ini,
                                                      'tf1x_python')

    trained_model = options.trained_model

    outdir = parameters.get_directory(para_file, 'inf_output_dir')

    # remove previous results (let user remove this folder manually or in exe.sh folder)
    io_function.mkdir(outdir)

    # get name of inference areas
    multi_inf_regions = parameters.get_string_list_parameters(
        para_file, 'inference_regions')

    # max_parallel_inf_task = parameters.get_digit_parameters(para_file,'max_parallel_inf_task','int')

    b_use_multiGPUs = parameters.get_bool_parameters(para_file,
                                                     'b_use_multiGPUs')

    # loop each inference regions
    sub_tasks = []
    for area_idx, area_ini in enumerate(multi_inf_regions):

        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        inf_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'inf_image_or_pattern')

        inf_img_list = io_function.get_file_list_by_pattern(
            inf_image_dir, inf_image_or_pattern)
        img_count = len(inf_img_list)
        if img_count < 1:
            raise ValueError(
                'No image for inference, please check inf_image_dir and inf_image_or_pattern in %s'
                % area_ini)

        area_save_dir = os.path.join(
            outdir, area_name + '_' + area_remark + '_' + area_time)
        io_function.mkdir(area_save_dir)

        # parallel inference images for this area
        CUDA_VISIBLE_DEVICES = []
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
            CUDA_VISIBLE_DEVICES = [
                int(item.strip())
                for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
            ]
        idx = 0
        while idx < img_count:

            if b_use_multiGPUs:
                # get available GPUs  # https://github.com/anderskm/gputil
                deviceIDs = GPUtil.getAvailable(order='first',
                                                limit=100,
                                                maxLoad=0.5,
                                                maxMemory=0.5,
                                                includeNan=False,
                                                excludeID=[],
                                                excludeUUID=[])
                # only use the one in CUDA_VISIBLE_DEVICES
                if len(CUDA_VISIBLE_DEVICES) > 0:
                    deviceIDs = [
                        item for item in deviceIDs
                        if item in CUDA_VISIBLE_DEVICES
                    ]
                    basic.outputlogMessage('on ' + machine_name +
                                           ', available GPUs:' +
                                           str(deviceIDs) +
                                           ', among visible ones:' +
                                           str(CUDA_VISIBLE_DEVICES))
                else:
                    basic.outputlogMessage('on ' + machine_name +
                                           ', available GPUs:' +
                                           str(deviceIDs))

                if len(deviceIDs) < 1:
                    time.sleep(
                        60
                    )  # wait one minute, then check the available GPUs again
                    continue
                # set only the first available visible
                gpuid = deviceIDs[0]
                basic.outputlogMessage(
                    '%d: predict image %s on GPU %d of %s' %
                    (idx, inf_img_list[idx], gpuid, machine_name))
            else:
                gpuid = None
                basic.outputlogMessage('%d: predict image %s on %s' %
                                       (idx, inf_img_list[idx], machine_name))

            # run inference
            img_save_dir = os.path.join(area_save_dir, 'I%d' % idx)
            inf_list_file = os.path.join(area_save_dir, '%d.txt' % idx)

            # if it already exist, then skip
            if os.path.isdir(img_save_dir) and is_file_exist_in_folder(
                    img_save_dir):
                basic.outputlogMessage(
                    'folder of %dth image (%s) already exist, '
                    'it has been predicted or is being predicted' %
                    (idx, inf_img_list[idx]))
                idx += 1
                continue

            with open(inf_list_file, 'w') as inf_obj:
                inf_obj.writelines(inf_img_list[idx] + '\n')

            sub_process = Process(target=predict_one_image_deeplab,
                                  args=(deeplab_inf_script, para_file,
                                        network_setting_ini, img_save_dir,
                                        inf_list_file, gpuid, trained_model))
            sub_process.start()
            sub_tasks.append(sub_process)

            if b_use_multiGPUs is False:
                # wait until previous one finished
                while sub_process.is_alive():
                    time.sleep(5)

            idx += 1

            # wait until predicted image patches exist or exceed 20 minutes
            time0 = time.time()
            elapsed_time = time.time() - time0
            while elapsed_time < 20 * 60:
                elapsed_time = time.time() - time0
                file_exist = is_file_exist_in_folder(img_save_dir)
                if file_exist is True or sub_process.is_alive() is False:
                    break
                else:
                    time.sleep(5)

            if sub_process.exitcode is not None and sub_process.exitcode != 0:
                sys.exit(1)

            # if 'chpc' in machine_name:
            #     time.sleep(60)  # wait 60 second on ITSC services
            # else:
            #     time.sleep(10)

    # check all the tasks already finished
    while b_all_task_finish(sub_tasks) is False:
        basic.outputlogMessage('wait all tasks to finish')
        time.sleep(60)

    end_time = datetime.datetime.now()

    diff_time = end_time - start_time
    out_str = "%s: time cost of total parallel inference on %s: %d seconds" % (
        str(end_time), machine_name, diff_time.seconds)
    basic.outputlogMessage(out_str)
    with open("time_cost.txt", 'a') as t_obj:
        t_obj.writelines(out_str + '\n')
Exemple #6
0
def mmseg_parallel_predict_main(para_file, trained_model):

    print(
        "MMSegmetation prediction using the trained model (run parallel if use multiple GPUs)"
    )
    machine_name = os.uname()[1]
    start_time = datetime.now()

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    # network_ini = parameters.get_string_parameters(para_file, 'network_setting_ini')
    # mmseg_repo_dir = parameters.get_directory(network_ini, 'mmseg_repo_dir')
    # mmseg_code_dir = osp.join(mmseg_repo_dir,'mmseg')

    # if os.path.isdir(mmseg_code_dir) is False:
    #     raise ValueError('%s does not exist' % mmseg_code_dir)

    # # set PYTHONPATH to use my modified version of mmseg
    # if os.getenv('PYTHONPATH'):
    #     os.environ['PYTHONPATH'] = os.getenv('PYTHONPATH') + ':' + mmseg_code_dir
    # else:
    #     os.environ['PYTHONPATH'] = mmseg_code_dir
    # print('\nPYTHONPATH is: ',os.getenv('PYTHONPATH'))

    if trained_model is None:
        trained_model = os.path.join(expr_name, 'latest.pth')

    outdir = parameters.get_directory(para_file, 'inf_output_dir')
    # remove previous results (let user remove this folder manually or in exe.sh folder)
    io_function.mkdir(outdir)

    # get name of inference areas
    multi_inf_regions = parameters.get_string_list_parameters(
        para_file, 'inference_regions')
    b_use_multiGPUs = parameters.get_bool_parameters(para_file,
                                                     'b_use_multiGPUs')

    # loop each inference regions
    sub_tasks = []
    for area_idx, area_ini in enumerate(multi_inf_regions):

        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        inf_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'inf_image_or_pattern')

        inf_img_list = io_function.get_file_list_by_pattern(
            inf_image_dir, inf_image_or_pattern)
        img_count = len(inf_img_list)
        if img_count < 1:
            raise ValueError(
                'No image for inference, please check inf_image_dir and inf_image_or_pattern in %s'
                % area_ini)

        area_save_dir = os.path.join(
            outdir, area_name + '_' + area_remark + '_' + area_time)
        io_function.mkdir(area_save_dir)

        # parallel inference images for this area
        CUDA_VISIBLE_DEVICES = []
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
            CUDA_VISIBLE_DEVICES = [
                int(item.strip())
                for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
            ]
        idx = 0
        while idx < img_count:

            if b_use_multiGPUs:
                # get available GPUs  # https://github.com/anderskm/gputil
                # memory: orders the available GPU device ids by ascending memory usage
                deviceIDs = GPUtil.getAvailable(order='memory',
                                                limit=100,
                                                maxLoad=0.5,
                                                maxMemory=0.5,
                                                includeNan=False,
                                                excludeID=[],
                                                excludeUUID=[])
                # only use the one in CUDA_VISIBLE_DEVICES
                if len(CUDA_VISIBLE_DEVICES) > 0:
                    deviceIDs = [
                        item for item in deviceIDs
                        if item in CUDA_VISIBLE_DEVICES
                    ]
                    basic.outputlogMessage('on ' + machine_name +
                                           ', available GPUs:' +
                                           str(deviceIDs) +
                                           ', among visible ones:' +
                                           str(CUDA_VISIBLE_DEVICES))
                else:
                    basic.outputlogMessage('on ' + machine_name +
                                           ', available GPUs:' +
                                           str(deviceIDs))

                if len(deviceIDs) < 1:
                    time.sleep(
                        60
                    )  # wait 60 seconds (mmseg need longer time to load models) , then check the available GPUs again
                    continue
                # set only the first available visible
                gpuid = deviceIDs[0]
                basic.outputlogMessage(
                    '%d: predict image %s on GPU %d of %s' %
                    (idx, inf_img_list[idx], gpuid, machine_name))
            else:
                gpuid = None
                basic.outputlogMessage('%d: predict image %s on %s' %
                                       (idx, inf_img_list[idx], machine_name))

            # run inference
            img_save_dir = os.path.join(area_save_dir, 'I%d' % idx)
            inf_list_file = os.path.join(area_save_dir, '%d.txt' % idx)

            done_indicator = '%s_done' % inf_list_file
            if os.path.isfile(done_indicator):
                basic.outputlogMessage('warning, %s exist, skip prediction' %
                                       done_indicator)
                idx += 1
                continue

            # if it already exist, then skip
            if os.path.isdir(img_save_dir) and is_file_exist_in_folder(
                    img_save_dir):
                basic.outputlogMessage(
                    'folder of %dth image (%s) already exist, '
                    'it has been predicted or is being predicted' %
                    (idx, inf_img_list[idx]))
                idx += 1
                continue

            with open(inf_list_file, 'w') as inf_obj:
                inf_obj.writelines(inf_img_list[idx] + '\n')

            sub_process = Process(target=predict_one_image_mmseg,
                                  args=(para_file, inf_img_list[idx],
                                        img_save_dir, inf_list_file, gpuid,
                                        trained_model))
            sub_process.start()
            sub_tasks.append(sub_process)

            if b_use_multiGPUs is False:
                # wait until previous one finished
                while sub_process.is_alive():
                    time.sleep(1)

            idx += 1

            # wait until predicted image patches exist or exceed 20 minutes
            time0 = time.time()
            elapsed_time = time.time() - time0
            while elapsed_time < 20 * 60:
                elapsed_time = time.time() - time0
                file_exist = os.path.isdir(
                    img_save_dir) and is_file_exist_in_folder(img_save_dir)
                if file_exist is True or sub_process.is_alive() is False:
                    break
                else:
                    time.sleep(1)

            if sub_process.exitcode is not None and sub_process.exitcode != 0:
                sys.exit(1)

            basic.close_remove_completed_process(sub_tasks)
            # if 'chpc' in machine_name:
            #     time.sleep(60)  # wait 60 second on ITSC services
            # else:
            #     time.sleep(10)

    # check all the tasks already finished
    wait_all_finish = 0
    while basic.b_all_process_finish(sub_tasks) is False:
        if wait_all_finish % 100 == 0:
            basic.outputlogMessage('wait all tasks to finish')
        time.sleep(1)
        wait_all_finish += 1

    basic.close_remove_completed_process(sub_tasks)
    end_time = datetime.now()

    diff_time = end_time - start_time
    out_str = "%s: time cost of total parallel inference on %s: %d seconds" % (
        str(end_time), machine_name, diff_time.seconds)
    basic.outputlogMessage(out_str)
    with open("time_cost.txt", 'a') as t_obj:
        t_obj.writelines(out_str + '\n')
def image_translate_train_generate_main(para_file, gpu_num):
    '''
     apply GAN to translate image from source domain to target domain

    existing sub-images (with sub-labels), these are image in source domain
    depend images for inference but no training data, each image for inference can be considered as on target domain

    '''
    print(datetime.now(), "image translation (train and generate) using GAN")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    gan_para_file = parameters.get_string_parameters_None_if_absence(
        para_file, 'regions_n_setting_image_translation_ini')
    if gan_para_file is None:
        print(
            'regions_n_setting_image_translation_ini is not set, skip image translation using GAN'
        )
        return None
    gan_para_file = os.path.abspath(
        gan_para_file
    )  # change to absolute path, because later, we change folder
    training_regions = parameters.get_string_list_parameters(
        para_file, 'training_regions')

    machine_name = os.uname()[1]
    SECONDS = time.time()

    # get regions (equal to or subset of inference regions) need apply image translation
    multi_gan_regions = parameters.get_string_list_parameters(
        gan_para_file, 'regions_need_image_translation')
    multi_gan_source_regions = parameters.get_string_list_parameters(
        gan_para_file, 'source_domain_regions')
    # check target domain
    if len(multi_gan_source_regions) != len(multi_gan_regions):
        raise ValueError(
            'the number of source domain and target domain is different')
    if set(multi_gan_source_regions).issubset(training_regions) is False:
        raise ValueError(
            'the source domain regions are not the subset of training regions')
    for area_idx, (area_gan_ini, area_src_ini) in enumerate(
            zip(multi_gan_regions, multi_gan_source_regions)):
        basic.outputlogMessage('%d: source and target area: %s vs %s' %
                               (area_idx, area_src_ini, area_gan_ini))

    gan_working_dir = parameters.get_string_parameters(gan_para_file,
                                                       'working_root')
    # gan_dir_pre_name = parameters.get_string_parameters(gan_para_file, 'gan_dir_pre_name')
    # use GAN model name as the gan_dir_pre_name
    gan_model = parameters.get_string_parameters(gan_para_file, 'gan_model')
    gan_dir_pre_name = gan_model  # '_' +

    # loop each regions need image translation
    sub_tasks = []
    for area_idx, (area_gan_ini, area_src_ini) in enumerate(
            zip(multi_gan_regions, multi_gan_source_regions)):

        area_ini = os.path.abspath(area_gan_ini)
        area_src_ini = os.path.abspath(area_src_ini)
        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        inf_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'inf_image_or_pattern')

        inf_img_list = io_function.get_file_list_by_pattern(
            inf_image_dir, inf_image_or_pattern)
        img_count = len(inf_img_list)
        if img_count < 1:
            raise ValueError(
                'No image for image translation, please check inf_image_dir and inf_image_or_pattern in %s'
                % area_ini)

        gan_project_save_dir = get_gan_project_save_dir(
            gan_working_dir, gan_dir_pre_name, area_name, area_remark,
            area_time, area_src_ini)

        if os.path.isdir(gan_project_save_dir):
            if generate_image_exists(gan_project_save_dir) is True:
                basic.outputlogMessage(
                    'generated new images (generate.txt_done) exist for %s exist, skip'
                    % gan_project_save_dir)
                continue
        else:
            io_function.mkdir(gan_project_save_dir)

        # parallel run image translation for this area
        CUDA_VISIBLE_DEVICES = []
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
            CUDA_VISIBLE_DEVICES = [
                int(item.strip())
                for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
            ]

        # get an valid GPU
        gpuids = []
        while len(gpuids) < 1:
            # get available GPUs  # https://github.com/anderskm/gputil
            deviceIDs = GPUtil.getAvailable(order='first',
                                            limit=100,
                                            maxLoad=0.5,
                                            maxMemory=0.5,
                                            includeNan=False,
                                            excludeID=[],
                                            excludeUUID=[])
            # only use the one in CUDA_VISIBLE_DEVICES
            if len(CUDA_VISIBLE_DEVICES) > 0:
                deviceIDs = [
                    item for item in deviceIDs if item in CUDA_VISIBLE_DEVICES
                ]
                basic.outputlogMessage('on ' + machine_name +
                                       ', available GPUs:' + str(deviceIDs) +
                                       ', among visible ones:' +
                                       str(CUDA_VISIBLE_DEVICES))
            else:
                basic.outputlogMessage('on ' + machine_name +
                                       ', available GPUs:' + str(deviceIDs))

            if len(deviceIDs) < 1:
                print(datetime.now(),
                      'No available GPUs, will check again in 60 seconds')
                time.sleep(
                    60)  # wait one minute, then check the available GPUs again
                continue
            # set only the first available visible
            gpuids.append(deviceIDs[0])
            basic.outputlogMessage(
                '%d:image translation for  %s on GPU %s of %s' %
                (area_idx, area_ini, str(gpuids), machine_name))

        # run image translation
        # pytorch consider first GPUs in CUDA_VISIBLE_DEVICES as zero, so need to re-index gpu ids
        if len(CUDA_VISIBLE_DEVICES) > 0:
            gpuids = [CUDA_VISIBLE_DEVICES.index(id) for id in gpuids]

        sub_process = Process(target=image_translate_train_generate_one_domain,
                              args=(gan_project_save_dir, gan_para_file,
                                    area_src_ini, area_ini, gpuids,
                                    inf_img_list))

        sub_process.start()
        sub_tasks.append(sub_process)

        # wait until image translation has started or exceed 20 minutes
        time0 = time.time()
        elapsed_time = time.time() - time0
        while elapsed_time < 20 * 60:
            elapsed_time = time.time() - time0
            if CUT_gan_is_ready_to_train(
                    gan_project_save_dir) is True or sub_process.is_alive(
                    ) is False:
                break
            else:
                time.sleep(5)

        time.sleep(
            10
        )  # wait, allowing time for the GAN process to start, and run into problem

        if sub_process.exitcode is not None and sub_process.exitcode != 0:
            sys.exit(1)

        basic.close_remove_completed_process(sub_tasks)

    # check all the tasks already finished
    while basic.b_all_process_finish(sub_tasks) is False:
        basic.outputlogMessage('wait all tasks to finish')
        time.sleep(60)
    basic.check_exitcode_of_process(sub_tasks)
    basic.close_remove_completed_process(sub_tasks)

    save_image_dir = parameters.get_string_parameters(para_file,
                                                      'input_train_dir')
    save_label_dir = parameters.get_string_parameters(para_file,
                                                      'input_label_dir')
    merge_subImages_from_gan(multi_gan_source_regions, multi_gan_regions,
                             gan_working_dir, gan_dir_pre_name, save_image_dir,
                             save_label_dir)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of translating sub images to target domains: %.2f seconds">>time_cost.txt'
        % duration)
def image_translate_train_generate_one_domain(gan_working_dir, gan_para_file,
                                              area_src_ini, area_gan_ini,
                                              gpu_ids, domainB_imgList):

    current_dir = os.getcwd()

    # get orignal sub-images
    _, _, area_ini_sub_images_labels_dict = original_sub_images_labels_list_before_gan(
    )
    sub_img_label_txt = os.path.join(current_dir,
                                     area_ini_sub_images_labels_dict)
    if os.path.isfile(area_ini_sub_images_labels_dict) is False:
        raise IOError(
            '%s not in the current folder, please get subImages first' %
            sub_img_label_txt)

    # prepare image list of domain A
    # what if the size of some images are not fit with CUT input?
    domain_A_images = []
    # domain_A_labels = []
    # with open(sub_img_label_txt) as txt_obj:
    #     line_list = [name.strip() for name in txt_obj.readlines()]
    #     for line in line_list:
    #         sub_image, sub_label = line.split(':')
    #         domain_A_images.append(os.path.join(current_dir,sub_image))
    #         # domain_A_labels.append(os.path.join(current_dir,sub_label))

    area_ini_sub_images_labels = io_function.read_dict_from_txt_json(
        'area_ini_sub_images_labels.txt')
    for line in area_ini_sub_images_labels[os.path.basename(area_src_ini)]:
        sub_image, sub_label = line.split(':')
        domain_A_images.append(os.path.join(current_dir, sub_image))
        # domain_A_labels.append(os.path.join(current_dir,sub_label))

    os.chdir(gan_working_dir)

    io_function.save_list_to_txt('image_A_list.txt', domain_A_images)

    # read target images, that will consider as target domains
    # what if there are too many images in domain B?
    io_function.save_list_to_txt('image_B_list.txt', domainB_imgList)

    gan_python = parameters.get_file_path_parameters(gan_para_file, 'python')
    cut_dir = parameters.get_directory(gan_para_file, 'gan_script_dir')
    train_script = os.path.join(cut_dir, 'train.py')
    generate_script = os.path.join(cut_dir, 'generate_image.py')
    # training of CUT
    if train_CUT_gan(gan_python, train_script, gan_para_file,
                     gpu_ids) is False:
        os.chdir(current_dir)
        return False

    # genenerate image using CUT, convert images in domain A to domain B
    save_tran_img_folder = 'subImages_translate'
    if generate_image_CUT(gan_python, generate_script, gan_para_file, gpu_ids,
                          domain_A_images, save_tran_img_folder) is False:
        os.chdir(current_dir)
        return False

    # change working directory back
    os.chdir(current_dir)
    pass