Esempio n. 1
0
def test_population_average():
    """
    lama has ony one arg, the config file. Loop over all the configs to test and
    run with lama.
    """

    config_file = Path(population_test_dir) / 'population_average_config.toml'
    run_lama.run(config_file)
Esempio n. 2
0
def main():
    """
    setup tools console_scripts used to allow command lien running of scripts needs to have a function that takes
    zero argumnets.
    """
    parser = argparse.ArgumentParser("The LAMA registration pipeline")
    parser.add_argument('-c',
                        dest='config',
                        help='Config file (YAML format)',
                        required=True)
    args = parser.parse_args()

    run_lama.run(Path(args.config))
Esempio n. 3
0
def test_lama_job_runner():
    """
    Test the lama job runner which was made to utilise multiple machines or the grid.
    This test just uses one machine for the tests at the moment.
    test_make_jobs_file() should run before this to create a jobs file that can be consumed.
    This test should be run before the stats test as it creates data that the stats test needs.


    NOTE this test should be at bottom of file as it should be ru last
    The oututs of these tests are consumed by the stats test.
    """

    config = test_data_root / 'debugging' / 'debug.toml'

    run_lama.run(config)

    assert True
Esempio n. 4
0
def lama_job_runner(config_path: Path,
                    root_directory: Path,
                    make_job_file: bool = False):
    """

    Parameters
    ----------
    config_path:
        path to registration config file:
    root_directory
        path to root directory. The folder names from job_file.dir will be appending to this path to resolve project directories
    make_job_file
        if true, just make the job_file that other instances can consume

    Notes
    -----
    This function uses a SoftFileLock for locking the job_file csv to prevent multiple instances of this code from
    processing the same line or specimen. A SoftFileLock works by creating a lock file, and the presence of this file
    prevents other instances from accessing it. We don't use FileLock (atlhough this is more robust) as it's not
    supported on nfs file systems. The advantage of SoftFileLock is you can create a lock file manually if
    you want to edit a job file manually while job_runner is running (make sure to delete after editing).

    If this script terminates unexpectedly while it has a lock on the file, it will not be released and the file
    remains. Therefore before running this script, ensure no previous lock file is hanging around.
    """

    if not config_path.is_file():
        raise FileNotFoundError(f"can't find config file {config_path}")

    root_directory = root_directory.resolve()

    job_file = root_directory / JOBFILE_NAME
    lock_file = job_file.with_suffix('.lock')
    lock = SoftFileLock(lock_file)
    # init_file = root_directory / 'init'

    HN = socket.gethostname()

    if make_job_file:

        # Delete any lockfile and job_file that might be present from previous runs.
        if job_file.is_file():
            os.remove(job_file)

        if lock_file.is_file():
            os.remove(lock_file)

        try:
            with lock.acquire(timeout=1):
                logging.info('Making job list file')
                make_jobs_file(job_file, root_directory)
                logging.info(
                    'Job file created!. You can now run job_runner from multiple machines'
                )
                return

        except Timeout:
            print(
                f"Make sure lock file: {lock_file} is not present on running first instance"
            )
            sys.exit()

    config_name = config_path.name

    while True:

        try:
            with lock.acquire(timeout=60):

                # Create a lock then read jobs and add status to job file to ensure job is run once only.
                df_jobs = pd.read_csv(job_file, index_col=0)

                # Get an unfinished job
                jobs_to_do = df_jobs[df_jobs['status'] == 'to_run']

                if len(jobs_to_do) < 1:
                    logging.info("No more jobs left on jobs list")
                    break

                indx = jobs_to_do.index[0]

                vol = root_directory / (jobs_to_do.at[indx, 'job'])

                df_jobs.at[indx, 'status'] = 'running'

                df_jobs.at[indx, 'start_time'] = datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')

                df_jobs.at[indx, 'host'] = socket.gethostname()

                df_jobs.to_csv(job_file)

                # Make a project dir drectory for specimen
                # vol.parent should be the line name
                # vol.stem is the specimen name minus the extension
                spec_root_dir = root_directory / 'output' / vol.parent.name / vol.stem
                spec_input_dir = spec_root_dir / 'inputs'
                spec_input_dir.mkdir(exist_ok=True, parents=True)
                spec_out_dir = spec_root_dir / 'output'
                spec_out_dir.mkdir(exist_ok=True, parents=True)
                shutil.copy(vol, spec_input_dir)

                # Copy the config into the project directory
                dest_config_path = spec_root_dir / config_name

                if dest_config_path.is_file():
                    os.remove(dest_config_path)

                shutil.copy(config_path, dest_config_path)

                # rename the target_folder now we've moved the config
                c = cfg_load(dest_config_path)

                target_folder = config_path.parent / c.get('target_folder')
                # Can't seem to get this to work with pathlib
                target_folder_relpath = os.path.relpath(
                    target_folder, str(dest_config_path.parent))
                c['target_folder'] = target_folder_relpath

                with open(dest_config_path, 'w') as fh:
                    fh.write(toml.dumps(c))

        except Timeout:
            sys.exit('Timed out' + socket.gethostname())

        try:
            print(f'debug {HN}, {linenum()}')
            print(f'trying {vol.name}')
            run_lama.run(dest_config_path)

        except LamaConfigError as lce:
            status = 'config_error'
            logging.exception(f'There is a problem with the config\n{lce}')
            sys.exit()

        except Exception as e:
            if e.__class__.__name__ == 'KeyboardInterrupt':
                logging.info('terminating')
                sys.exit('Exiting')

            status = 'failed'
            logging.exception(e)

        else:
            status = 'complete'

        finally:
            with lock:
                df_jobs = pd.read_csv(job_file, index_col=0)
                df_jobs.at[indx, 'status'] = status
                df_jobs.at[indx, 'end_time'] = datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')
                df_jobs.to_csv(job_file)
    print('Exiting job_runner')
    return True
def test_pairwie_average():
    config_file = Path(population_test_dir)/ 'pairwise_population_average_config.toml'
    run_lama.run(config_file)