Beispiel #1
0
    def predict_in_block(block):

        from distributed import get_worker

        read_roi = block.read_roi
        write_roi = block.write_roi
        predict_script = '/groups/saalfeld/home/hanslovskyp/experiments/quasi-isotropic/predict/predict.py'
        cuda_visible_devices = get_worker().cuda_visible_devices
        predict_scripts_args = ''

        name = 'predict-%s-%s' % (write_roi.get_begin(), write_roi.get_size())
        log_file = os.path.join(cwd, '%s.log' % name)
        pythonpath = ':'.join([
            '%s/workspace-pycharm/u-net/gunpowder' % _HOME,
            '%s/workspace-pycharm/u-net/CNNectome' % _HOME,
            '/groups/saalfeld/home/papec/Work/my_projects/z5/bld/python'
        ])
        pythonpath_export_str = 'export PYTHONPATH=%s:$PYTHONPATH' % pythonpath

        daisy.call([
            'nvidia-docker', 'run', '--rm', '-u',
            os.getuid(), '-v', '/groups/turaga:/groups/turaga:rshared', '-v',
            '/groups/saalfeld:/groups/saalfeld:rshared', '-v',
            '/nrs/saalfeld:/nrs/saalfeld:rshared', '-w', cwd, '--name', name,
            'neptunes5thmoon/gunpowder:v0.3-pre6-dask1'
            '/bin/bash', '-c',
            '"export CUDA_VISIBLE_DEVICES=%s; %s; python -u %s %s 2>&1 > %s"' %
            (cuda_visible_devices, pythonpath_export_str, predict_script,
             predict_script_args, log_file)
        ])
Beispiel #2
0
def start_worker(
    predict_config,
    worker_config,
    data_config,
    graph_config,
    solve_config,
    queue,
    singularity_container,
    mount_dirs,
    solve_block,
    solve_setup_dir,
):

    worker_id = daisy.Context.from_env().worker_id

    log_out = os.path.join(solve_setup_dir, "{}_worker.out".format(worker_id))
    log_err = os.path.join(solve_setup_dir, "{}_worker.err".format(worker_id))

    base_command = "python -u {} {} {} {} {} {}".format(
        solve_block,
        predict_config,
        worker_config,
        data_config,
        graph_config,
        solve_config,
    )
    if queue == "None":
        logger.warning("Running block **locally**, no queue provided.")
        if singularity_container == "None":
            logger.warning(
                "Running block in current environment, no singularity image provided."
            )
            cmd = [base_command]
        else:
            cmd = run_singularity(
                base_command,
                singularity_container,
                mount_dirs=mount_dirs,
                execute=False,
                expand=False,
            )
    else:
        logger.info("Running block on queue {} and container {}".format(
            queue, singularity_container))
        cmd = run(
            command=base_command,
            queue=queue,
            num_gpus=0,
            num_cpus=1,
            singularity_image=singularity_container,
            mount_dirs=mount_dirs,
            execute=False,
            expand=False,
            batch=True,
        )

    daisy.call(cmd, log_out=log_out, log_err=log_err)

    logger.info("Solve worker finished")
def start_worker(affs_file, affs_dataset, fragments_file, fragments_dataset,
                 db_host, db_name, context, fragments_in_xy, queue,
                 network_dir, epsilon_agglomerate, mask_file, mask_dataset,
                 filter_fragments, replace_sections, num_voxels_in_block,
                 **kwargs):

    worker_id = daisy.Context.from_env().worker_id

    logging.info(f"worker {worker_id} started...")

    output_dir = os.path.join('.extract_fragments_blockwise', network_dir)
    os.makedirs(output_dir, exist_ok=True)

    log_out = os.path.join(output_dir,
                           'extract_fragments_blockwise_%d.out' % worker_id)
    log_err = os.path.join(output_dir,
                           'extract_fragments_blockwise_%d.err' % worker_id)

    config = {
        'affs_file': affs_file,
        'affs_dataset': affs_dataset,
        'fragments_file': fragments_file,
        'fragments_dataset': fragments_dataset,
        'db_host': db_host,
        'db_name': db_name,
        'context': context,
        'fragments_in_xy': fragments_in_xy,
        'queue': queue,
        'epsilon_agglomerate': epsilon_agglomerate,
        'mask_file': mask_file,
        'mask_dataset': mask_dataset,
        'filter_fragments': filter_fragments,
        'replace_sections': replace_sections,
        'num_voxels_in_block': num_voxels_in_block
    }

    config_str = ''.join(['%s' % (v, ) for v in config.values()])
    config_hash = abs(int(hashlib.md5(config_str.encode()).hexdigest(), 16))

    config_file = os.path.join(output_dir, '%d.config' % config_hash)

    with open(config_file, 'w') as f:
        json.dump(config, f)

    logging.info('Running block with config %s...' % config_file)

    worker = 'workers/extract_fragments_worker.py'

    worker_command = os.path.join('.', worker)

    base_command = [
        'bsub', '-n', '1', '-o', f'{log_out}',
        f'python {worker_command} {config_file} > {log_out}'
    ]

    logging.info(f'Base command: {base_command}')

    daisy.call(base_command, log_out=log_out, log_err=log_err)
Beispiel #4
0
def predict_worker(config_file, iteration):
    config = {
        "singularity_image": 'linajea/linajea:v1.1',
        "queue": 'slowpoke',
        'setups_dir': '../02_setups'
    }
    master_config = load_config(config_file)
    config.update(master_config['general'])
    config.update(master_config['predict'])
    singularity_image = config['singularity_image']
    queue = config['queue']
    setups_dir = config['setups_dir']
    setup = config['setup']
    chargeback = config['lab']

    worker_id = daisy.Context.from_env().worker_id
    worker_time = time.time()
    if singularity_image is not None:
        image_path = '/nrs/funke/singularity/'
        image = image_path + singularity_image + '.img'
        logger.debug("Using singularity image %s" % image)
    else:
        image = None
    cmd = run(command='python -u %s --config %s --iteration %d' %
              (os.path.join(setups_dir, 'predict.py'), config_file, iteration),
              queue=queue,
              num_gpus=1,
              num_cpus=5,
              singularity_image=image,
              mount_dirs=['/groups', '/nrs'],
              execute=False,
              expand=False,
              flags=['-P ' + chargeback])
    logger.info("Starting predict worker...")
    logger.info("Command: %s" % str(cmd))
    daisy.call(
        cmd,
        log_out='logs/predict_%s_%d_%d.out' % (setup, worker_time, worker_id),
        log_err='logs/predict_%s_%d_%d.err' % (setup, worker_time, worker_id))

    logger.info("Predict worker finished")
Beispiel #5
0
def predict_worker(train_setup_dir, predict_setup_dir, predict_number,
                   train_number, experiment, iteration, in_container,
                   in_dataset, out_container, db_host, db_name, queue,
                   singularity_container, num_cpus, num_cache_workers,
                   mount_dirs):

    predict_block = os.path.join(predict_setup_dir, 'predict_block.py')

    run_instruction = {
        'queue': queue,
        'num_cpus': num_cpus,
        'num_cache_workers': num_cache_workers,
        'singularity': singularity_container
    }

    worker_instruction = {
        'train_setup_dir': train_setup_dir,
        'iteration': iteration,
        'in_container': in_container,
        'in_dataset': in_dataset,
        'out_container': out_container,
        'db_host': db_host,
        'db_name': db_name,
        'run_instruction': run_instruction
    }

    worker_id = daisy.Context.from_env().worker_id
    worker_dir = os.path.join(predict_setup_dir, "worker_files")
    try:
        os.makedirs(worker_dir)
    except:
        pass

    worker_instruction_file = os.path.join(
        worker_dir, '{}_worker_instruction.json'.format(worker_id))
    log_out = os.path.join(worker_dir, '{}_worker.out'.format(worker_id))
    log_err = os.path.join(worker_dir, '{}_worker.err'.format(worker_id))

    with open(worker_instruction_file, 'w') as f:
        json.dump(worker_instruction, f)

    logger.info(
        'Running block for prediction (e:{}, t:{}, i:{}, p:{}) and worker instruction {}...'
        .format(experiment, train_number, iteration, predict_number,
                worker_id))

    base_command = "python -u {} {}".format(predict_block,
                                            worker_instruction_file)
    if queue == "None":
        logger.warning("Running block **locally**, no queue provided.")
        if singularity_container == "None":
            logger.warning(
                "Running block in current environment, no singularity image provided."
            )
            cmd = base_command
        else:
            cmd = run_singularity(base_command,
                                  singularity_container,
                                  mount_dirs=mount_dirs,
                                  execute=False,
                                  expand=False)
    else:
        logger.info("Running block on queue {} and container {}".format(
            queue, singularity_container))
        cmd = run(command=base_command,
                  queue=queue,
                  num_gpus=1,
                  num_cpus=num_cpus,
                  singularity_image=singularity_container,
                  mount_dirs=mount_dirs,
                  execute=False,
                  expand=False)

    daisy.call(cmd, log_out=log_out, log_err=log_err)

    logger.info('Predict worker finished')
Beispiel #6
0
def predict_worker(experiment, setup, network_dir, iteration, raw_file,
                   raw_dataset, auto_file, auto_dataset, out_file, out_dataset,
                   db_host, db_name, queue, singularity_image):

    # get the relevant worker script to distribute
    setup_dir = os.path.join('..', experiment, '02_train', setup)
    predict_script = os.path.abspath(os.path.join(setup_dir, 'predict.py'))

    if raw_file.endswith('.json'):
        with open(raw_file, 'r') as f:
            spec = json.load(f)
            raw_file = spec['container']

    worker_config = {'queue': queue, 'num_cpus': 5, 'num_cache_workers': 5}

    config = {
        'iteration': iteration,
        'raw_file': raw_file,
        'raw_dataset': raw_dataset,
        'auto_file': auto_file,
        'auto_dataset': auto_dataset,
        'out_file': out_file,
        'out_dataset': out_dataset,
        'db_host': db_host,
        'db_name': db_name,
        'worker_config': worker_config
    }

    # get a unique hash for this configuration
    config_str = ''.join(['%s' % (v, ) for v in config.values()])
    config_hash = abs(int(hashlib.md5(config_str.encode()).hexdigest(), 16))

    # get worker id
    worker_id = daisy.Context.from_env().worker_id

    output_dir = os.path.join('.predict_blockwise', network_dir)
    os.makedirs(output_dir, exist_ok=True)

    # pipe output
    config_file = os.path.join(output_dir, '%d.config' % config_hash)
    log_out = os.path.join(output_dir, 'predict_blockwise_%d.out' % worker_id)
    log_err = os.path.join(output_dir, 'predict_blockwise_%d.err' % worker_id)

    with open(config_file, 'w') as f:
        json.dump(config, f)

    logging.info('Running block with config %s...' % config_file)

    # create worker command
    command = [
        'bsub', '-n',
        str(worker_config['num_cpus']), '-o', f'{log_out}', '-gpu', 'num=1',
        '-q', worker_config['queue']
    ]

    if singularity_image is not None:
        command += [
            'singularity exec', '-B', '/groups', '--nv', singularity_image
        ]

    command += ['python -u %s %s' % (predict_script, config_file)]

    logging.info(f'Worker command: {command}')

    # call command
    daisy.call(command, log_out=log_out, log_err=log_err)

    logging.info('Predict worker finished')