def predict_in_block(block): from distributed import get_worker read_roi = block.read_roi write_roi = block.write_roi predict_script = '/groups/saalfeld/home/hanslovskyp/experiments/quasi-isotropic/predict/predict.py' cuda_visible_devices = get_worker().cuda_visible_devices predict_scripts_args = '' name = 'predict-%s-%s' % (write_roi.get_begin(), write_roi.get_size()) log_file = os.path.join(cwd, '%s.log' % name) pythonpath = ':'.join([ '%s/workspace-pycharm/u-net/gunpowder' % _HOME, '%s/workspace-pycharm/u-net/CNNectome' % _HOME, '/groups/saalfeld/home/papec/Work/my_projects/z5/bld/python' ]) pythonpath_export_str = 'export PYTHONPATH=%s:$PYTHONPATH' % pythonpath daisy.call([ 'nvidia-docker', 'run', '--rm', '-u', os.getuid(), '-v', '/groups/turaga:/groups/turaga:rshared', '-v', '/groups/saalfeld:/groups/saalfeld:rshared', '-v', '/nrs/saalfeld:/nrs/saalfeld:rshared', '-w', cwd, '--name', name, 'neptunes5thmoon/gunpowder:v0.3-pre6-dask1' '/bin/bash', '-c', '"export CUDA_VISIBLE_DEVICES=%s; %s; python -u %s %s 2>&1 > %s"' % (cuda_visible_devices, pythonpath_export_str, predict_script, predict_script_args, log_file) ])
def start_worker( predict_config, worker_config, data_config, graph_config, solve_config, queue, singularity_container, mount_dirs, solve_block, solve_setup_dir, ): worker_id = daisy.Context.from_env().worker_id log_out = os.path.join(solve_setup_dir, "{}_worker.out".format(worker_id)) log_err = os.path.join(solve_setup_dir, "{}_worker.err".format(worker_id)) base_command = "python -u {} {} {} {} {} {}".format( solve_block, predict_config, worker_config, data_config, graph_config, solve_config, ) if queue == "None": logger.warning("Running block **locally**, no queue provided.") if singularity_container == "None": logger.warning( "Running block in current environment, no singularity image provided." ) cmd = [base_command] else: cmd = run_singularity( base_command, singularity_container, mount_dirs=mount_dirs, execute=False, expand=False, ) else: logger.info("Running block on queue {} and container {}".format( queue, singularity_container)) cmd = run( command=base_command, queue=queue, num_gpus=0, num_cpus=1, singularity_image=singularity_container, mount_dirs=mount_dirs, execute=False, expand=False, batch=True, ) daisy.call(cmd, log_out=log_out, log_err=log_err) logger.info("Solve worker finished")
def start_worker(affs_file, affs_dataset, fragments_file, fragments_dataset, db_host, db_name, context, fragments_in_xy, queue, network_dir, epsilon_agglomerate, mask_file, mask_dataset, filter_fragments, replace_sections, num_voxels_in_block, **kwargs): worker_id = daisy.Context.from_env().worker_id logging.info(f"worker {worker_id} started...") output_dir = os.path.join('.extract_fragments_blockwise', network_dir) os.makedirs(output_dir, exist_ok=True) log_out = os.path.join(output_dir, 'extract_fragments_blockwise_%d.out' % worker_id) log_err = os.path.join(output_dir, 'extract_fragments_blockwise_%d.err' % worker_id) config = { 'affs_file': affs_file, 'affs_dataset': affs_dataset, 'fragments_file': fragments_file, 'fragments_dataset': fragments_dataset, 'db_host': db_host, 'db_name': db_name, 'context': context, 'fragments_in_xy': fragments_in_xy, 'queue': queue, 'epsilon_agglomerate': epsilon_agglomerate, 'mask_file': mask_file, 'mask_dataset': mask_dataset, 'filter_fragments': filter_fragments, 'replace_sections': replace_sections, 'num_voxels_in_block': num_voxels_in_block } config_str = ''.join(['%s' % (v, ) for v in config.values()]) config_hash = abs(int(hashlib.md5(config_str.encode()).hexdigest(), 16)) config_file = os.path.join(output_dir, '%d.config' % config_hash) with open(config_file, 'w') as f: json.dump(config, f) logging.info('Running block with config %s...' % config_file) worker = 'workers/extract_fragments_worker.py' worker_command = os.path.join('.', worker) base_command = [ 'bsub', '-n', '1', '-o', f'{log_out}', f'python {worker_command} {config_file} > {log_out}' ] logging.info(f'Base command: {base_command}') daisy.call(base_command, log_out=log_out, log_err=log_err)
def predict_worker(config_file, iteration): config = { "singularity_image": 'linajea/linajea:v1.1', "queue": 'slowpoke', 'setups_dir': '../02_setups' } master_config = load_config(config_file) config.update(master_config['general']) config.update(master_config['predict']) singularity_image = config['singularity_image'] queue = config['queue'] setups_dir = config['setups_dir'] setup = config['setup'] chargeback = config['lab'] worker_id = daisy.Context.from_env().worker_id worker_time = time.time() if singularity_image is not None: image_path = '/nrs/funke/singularity/' image = image_path + singularity_image + '.img' logger.debug("Using singularity image %s" % image) else: image = None cmd = run(command='python -u %s --config %s --iteration %d' % (os.path.join(setups_dir, 'predict.py'), config_file, iteration), queue=queue, num_gpus=1, num_cpus=5, singularity_image=image, mount_dirs=['/groups', '/nrs'], execute=False, expand=False, flags=['-P ' + chargeback]) logger.info("Starting predict worker...") logger.info("Command: %s" % str(cmd)) daisy.call( cmd, log_out='logs/predict_%s_%d_%d.out' % (setup, worker_time, worker_id), log_err='logs/predict_%s_%d_%d.err' % (setup, worker_time, worker_id)) logger.info("Predict worker finished")
def predict_worker(train_setup_dir, predict_setup_dir, predict_number, train_number, experiment, iteration, in_container, in_dataset, out_container, db_host, db_name, queue, singularity_container, num_cpus, num_cache_workers, mount_dirs): predict_block = os.path.join(predict_setup_dir, 'predict_block.py') run_instruction = { 'queue': queue, 'num_cpus': num_cpus, 'num_cache_workers': num_cache_workers, 'singularity': singularity_container } worker_instruction = { 'train_setup_dir': train_setup_dir, 'iteration': iteration, 'in_container': in_container, 'in_dataset': in_dataset, 'out_container': out_container, 'db_host': db_host, 'db_name': db_name, 'run_instruction': run_instruction } worker_id = daisy.Context.from_env().worker_id worker_dir = os.path.join(predict_setup_dir, "worker_files") try: os.makedirs(worker_dir) except: pass worker_instruction_file = os.path.join( worker_dir, '{}_worker_instruction.json'.format(worker_id)) log_out = os.path.join(worker_dir, '{}_worker.out'.format(worker_id)) log_err = os.path.join(worker_dir, '{}_worker.err'.format(worker_id)) with open(worker_instruction_file, 'w') as f: json.dump(worker_instruction, f) logger.info( 'Running block for prediction (e:{}, t:{}, i:{}, p:{}) and worker instruction {}...' .format(experiment, train_number, iteration, predict_number, worker_id)) base_command = "python -u {} {}".format(predict_block, worker_instruction_file) if queue == "None": logger.warning("Running block **locally**, no queue provided.") if singularity_container == "None": logger.warning( "Running block in current environment, no singularity image provided." ) cmd = base_command else: cmd = run_singularity(base_command, singularity_container, mount_dirs=mount_dirs, execute=False, expand=False) else: logger.info("Running block on queue {} and container {}".format( queue, singularity_container)) cmd = run(command=base_command, queue=queue, num_gpus=1, num_cpus=num_cpus, singularity_image=singularity_container, mount_dirs=mount_dirs, execute=False, expand=False) daisy.call(cmd, log_out=log_out, log_err=log_err) logger.info('Predict worker finished')
def predict_worker(experiment, setup, network_dir, iteration, raw_file, raw_dataset, auto_file, auto_dataset, out_file, out_dataset, db_host, db_name, queue, singularity_image): # get the relevant worker script to distribute setup_dir = os.path.join('..', experiment, '02_train', setup) predict_script = os.path.abspath(os.path.join(setup_dir, 'predict.py')) if raw_file.endswith('.json'): with open(raw_file, 'r') as f: spec = json.load(f) raw_file = spec['container'] worker_config = {'queue': queue, 'num_cpus': 5, 'num_cache_workers': 5} config = { 'iteration': iteration, 'raw_file': raw_file, 'raw_dataset': raw_dataset, 'auto_file': auto_file, 'auto_dataset': auto_dataset, 'out_file': out_file, 'out_dataset': out_dataset, 'db_host': db_host, 'db_name': db_name, 'worker_config': worker_config } # get a unique hash for this configuration config_str = ''.join(['%s' % (v, ) for v in config.values()]) config_hash = abs(int(hashlib.md5(config_str.encode()).hexdigest(), 16)) # get worker id worker_id = daisy.Context.from_env().worker_id output_dir = os.path.join('.predict_blockwise', network_dir) os.makedirs(output_dir, exist_ok=True) # pipe output config_file = os.path.join(output_dir, '%d.config' % config_hash) log_out = os.path.join(output_dir, 'predict_blockwise_%d.out' % worker_id) log_err = os.path.join(output_dir, 'predict_blockwise_%d.err' % worker_id) with open(config_file, 'w') as f: json.dump(config, f) logging.info('Running block with config %s...' % config_file) # create worker command command = [ 'bsub', '-n', str(worker_config['num_cpus']), '-o', f'{log_out}', '-gpu', 'num=1', '-q', worker_config['queue'] ] if singularity_image is not None: command += [ 'singularity exec', '-B', '/groups', '--nv', singularity_image ] command += ['python -u %s %s' % (predict_script, config_file)] logging.info(f'Worker command: {command}') # call command daisy.call(command, log_out=log_out, log_err=log_err) logging.info('Predict worker finished')