Ejemplo n.º 1
0
def submit_hillshade_newest_headwall_line_grid_job(ids_list, idx,
                                                   grid_base_name,
                                                   max_job_count):

    wait_if_reach_max_jobs(max_job_count, 'dLi')  # draw Line on hillshade

    job_name = 'dLi%d' % idx
    check_length_jobname(job_name)
    work_dir = working_dir_string(idx,
                                  'hillshade_newest_headwall_line_',
                                  root=root_dir)
    if os.path.isdir(work_dir) is False:
        io_function.mkdir(work_dir)
        os.chdir(work_dir)

        ids_list = [str(item) for item in ids_list]
        io_function.save_list_to_txt(grid_base_name + '.txt', ids_list)

        # prepare job
        sh_list = [
            'hillshade_headwall_line_grid.sh',
            'job_hillshade_headwall_line_grid.sh'
        ]
        copy_curc_job_files(jobsh_dir, work_dir, sh_list)
        slurm_utility.modify_slurm_job_sh(
            'job_hillshade_headwall_line_grid.sh', 'job-name', job_name)
    else:
        os.chdir(work_dir)
        submit_job_names = slurm_utility.get_submited_job_names(curc_username)
        if job_name in submit_job_names:
            print(
                'The folder: %s already exist and the job has been submitted, skip submitting a new job'
                % work_dir)
            return

        # job is completed
        if os.path.isfile('done.txt'):
            print('The job in the folder: %s is Done' % work_dir)
            return

    # submit the job
    # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note
    submit_job_curc_or_run_script_local('job_hillshade_headwall_line_grid.sh',
                                        'hillshade_headwall_line_grid.sh')

    os.chdir(curr_dir_before_start)
Ejemplo n.º 2
0
def submit_extract_headwall_job(slope_tifs, idx, max_job_count):

    wait_if_reach_max_jobs(max_job_count, 'HW')

    job_name = 'HW%d' % idx
    check_length_jobname(job_name)
    work_dir = working_dir_string(idx, 'extract_headwall_', root=root_dir)
    if os.path.isdir(work_dir) is False:
        io_function.mkdir(work_dir)
        os.chdir(work_dir)

        io_function.save_list_to_txt('slope_tif_list.txt', slope_tifs)

        # run segmentation
        sh_list = ['job_healwall.sh', 'extract_headwall_from_slope.sh']
        copy_curc_job_files(jobsh_dir, work_dir, sh_list)
        slurm_utility.modify_slurm_job_sh('job_healwall.sh', 'job-name',
                                          job_name)

    else:
        os.chdir(work_dir)

        # job is completed
        if os.path.isfile('done.txt'):
            print('The job in the folder: %s is Done' % work_dir)
            return

        submit_job_names = slurm_utility.get_submited_job_names(curc_username)
        if job_name in submit_job_names:
            print(
                'The folder: %s already exist and the job has been submitted, skip submitting a new job'
                % work_dir)
            return

    # submit the job
    # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note
    submit_job_curc_or_run_script_local('job_healwall.sh',
                                        'extract_headwall_from_slope.sh')

    os.chdir(curr_dir_before_start)

    return
Ejemplo n.º 3
0
def submit_training_job(idx, lr, iter_num, batch_size, backbone, buffer_size,
                        training_data_per, data_augmentation,
                        data_aug_ignore_classes):

    while True:
        job_count = slurm_utility.get_submit_job_count(curc_username)
        if job_count >= 5:
            print(machine_name, datetime.now(),
                  'You have submitted 5 or more jobs, wait ')
            time.sleep(60)  #
            continue
        break

    para_file = 'main_para_exp9.ini'
    job_name = 'tune%d' % idx
    work_dir = working_dir_string(idx, root=root_dir)
    if os.path.isdir(work_dir) is False:
        io_function.mkdir(work_dir)
        os.chdir(work_dir)

        # create a training folder
        copy_ini_files(ini_dir, work_dir, para_file, area_ini_list, backbone)

        # change para_file
        modify_parameter(os.path.join(work_dir, para_file),
                         'network_setting_ini', backbone)
        modify_parameter(os.path.join(work_dir, backbone),
                         'base_learning_rate', lr)
        modify_parameter(os.path.join(work_dir, backbone), 'batch_size',
                         batch_size)
        modify_parameter(os.path.join(work_dir, backbone), 'iteration_num',
                         iter_num)

        modify_parameter(os.path.join(work_dir, para_file), 'buffer_size',
                         buffer_size)
        modify_parameter(os.path.join(work_dir, para_file),
                         'training_data_per', training_data_per)
        modify_parameter(os.path.join(work_dir, para_file),
                         'data_augmentation', data_augmentation)
        modify_parameter(os.path.join(work_dir, para_file),
                         'data_aug_ignore_classes', data_aug_ignore_classes)

        # run training
        # whole_procedure.run_whole_procedure(para_file, b_train_only=True)
        # copy job.sh exe.sh and other, run submit jobs
        copy_curc_job_files(jobsh_dir, work_dir)
        slurm_utility.modify_slurm_job_sh('job_tf_GPU.sh', 'job-name',
                                          job_name)

    else:
        os.chdir(work_dir)

        submit_job_names = slurm_utility.get_submited_job_names(curc_username)
        if job_name in submit_job_names:
            print(
                'The folder: %s already exist and the job has been submitted, skip submitting a new job'
                % work_dir)
            return work_dir, os.path.join(work_dir, para_file)

        # if result exists, well trained, or early stopping
        early_stop, model_trained_iter = check_early_stopping_trained_iteration(
            work_dir, para_file)
        if early_stop is True:
            print(
                'The folder: %s is early_stopping with trained model of %d iteration, skip submitting a new job'
                % (work_dir, model_trained_iter))
            return work_dir, os.path.join(work_dir, para_file)
        if model_trained_iter >= iter_num:
            print(
                'The folder: %s has been trained of %d iteration (>=required), skip submitting a new job'
                % (work_dir, model_trained_iter))
            return work_dir, os.path.join(work_dir, para_file)

    # submit the job
    # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note
    res = os.system('sbatch job_tf_GPU.sh')
    if res != 0:
        sys.exit(1)

    os.chdir(curr_dir_before_start)

    return work_dir, os.path.join(work_dir, para_file)