def test_host(self): command = 'python3.6 ./funlib/tests/am_i_on_my_host.py' run(command, num_cpus=1, num_gpus=0, memory=25600, working_dir=self.working_dir, singularity_image=self.singularity_image, host="c04u21", queue="slowpoke", execute=True)
def test_submit(self): command = 'python3.6 ./funlib/tests/am_i_in_a_container.py' run(command, num_cpus=5, num_gpus=1, memory=25600, working_dir=self.working_dir, singularity_image=self.singularity_image, host="", queue="slowpoke", execute=True)
def test_mount(self): command = 'python3.6 ./funlib/tests/am_i_mounted.py' run(command, num_cpus=1, num_gpus=0, memory=25600, working_dir=self.working_dir, singularity_image=self.singularity_image, host="", queue="slowpoke", mount_dirs=["/nrs", "/tmp"], execute=True)
def start_worker( predict_config, worker_config, data_config, graph_config, solve_config, queue, singularity_container, mount_dirs, solve_block, solve_setup_dir, ): worker_id = daisy.Context.from_env().worker_id log_out = os.path.join(solve_setup_dir, "{}_worker.out".format(worker_id)) log_err = os.path.join(solve_setup_dir, "{}_worker.err".format(worker_id)) base_command = "python -u {} {} {} {} {} {}".format( solve_block, predict_config, worker_config, data_config, graph_config, solve_config, ) if queue == "None": logger.warning("Running block **locally**, no queue provided.") if singularity_container == "None": logger.warning( "Running block in current environment, no singularity image provided." ) cmd = [base_command] else: cmd = run_singularity( base_command, singularity_container, mount_dirs=mount_dirs, execute=False, expand=False, ) else: logger.info("Running block on queue {} and container {}".format( queue, singularity_container)) cmd = run( command=base_command, queue=queue, num_gpus=0, num_cpus=1, singularity_image=singularity_container, mount_dirs=mount_dirs, execute=False, expand=False, batch=True, ) daisy.call(cmd, log_out=log_out, log_err=log_err) logger.info("Solve worker finished")
def submit_jobs(db_credentials, db_name, collection_name, predict_id, dataset, model, n_gpus, n_cpus, batch_size, prefetch_factor, queue, singularity_container=None, mount_dirs=["/nrs", "/scratch", "/groups", "/misc"]): brain_db = BrainDb(db_credentials, db_name, collection_name, predict_id) #brain_db.initialize() dx = model.input_shape[2] * dataset.voxel_size[2] dy = model.input_shape[1] * dataset.voxel_size[1] dz = model.input_shape[0] * dataset.voxel_size[0] job_script = os.path.join(os.path.dirname(__file__), "job.py") dataset_name = dataset.name for gpu_id in range(n_gpus): base_cmd = f"python -u {job_script} --creds {db_credentials} --db {db_name} "+\ f"--coll {collection_name} --id {predict_id} "+\ f"--dat {dataset_name} --gpus {n_gpus} " +\ f"--cpus {n_cpus} --bsize {batch_size} --prefetch {prefetch_factor} --gpuid {gpu_id}" if queue is not None: cmd = run(command=base_cmd, queue=queue, num_gpus=1, num_cpus=n_cpus, singularity_image=singularity_container, mount_dirs=mount_dirs, execute=False, expand=True) else: cmd = base_cmd cmd = [c for c in cmd.split(" ") if c != ''] cmd_string = "" for c in cmd: cmd_string += str(c) + " " print(cmd_string) Popen(cmd_string, shell=True)
def predict_worker(config_file, iteration): config = { "singularity_image": 'linajea/linajea:v1.1', "queue": 'slowpoke', 'setups_dir': '../02_setups' } master_config = load_config(config_file) config.update(master_config['general']) config.update(master_config['predict']) singularity_image = config['singularity_image'] queue = config['queue'] setups_dir = config['setups_dir'] setup = config['setup'] chargeback = config['lab'] worker_id = daisy.Context.from_env().worker_id worker_time = time.time() if singularity_image is not None: image_path = '/nrs/funke/singularity/' image = image_path + singularity_image + '.img' logger.debug("Using singularity image %s" % image) else: image = None cmd = run(command='python -u %s --config %s --iteration %d' % (os.path.join(setups_dir, 'predict.py'), config_file, iteration), queue=queue, num_gpus=1, num_cpus=5, singularity_image=image, mount_dirs=['/groups', '/nrs'], execute=False, expand=False, flags=['-P ' + chargeback]) logger.info("Starting predict worker...") logger.info("Command: %s" % str(cmd)) daisy.call( cmd, log_out='logs/predict_%s_%d_%d.out' % (setup, worker_time, worker_id), log_err='logs/predict_%s_%d_%d.err' % (setup, worker_time, worker_id)) logger.info("Predict worker finished")
def predict_worker(train_setup_dir, predict_setup_dir, predict_number, train_number, experiment, iteration, in_container, in_dataset, out_container, db_host, db_name, queue, singularity_container, num_cpus, num_cache_workers, mount_dirs): predict_block = os.path.join(predict_setup_dir, 'predict_block.py') run_instruction = { 'queue': queue, 'num_cpus': num_cpus, 'num_cache_workers': num_cache_workers, 'singularity': singularity_container } worker_instruction = { 'train_setup_dir': train_setup_dir, 'iteration': iteration, 'in_container': in_container, 'in_dataset': in_dataset, 'out_container': out_container, 'db_host': db_host, 'db_name': db_name, 'run_instruction': run_instruction } worker_id = daisy.Context.from_env().worker_id worker_dir = os.path.join(predict_setup_dir, "worker_files") try: os.makedirs(worker_dir) except: pass worker_instruction_file = os.path.join( worker_dir, '{}_worker_instruction.json'.format(worker_id)) log_out = os.path.join(worker_dir, '{}_worker.out'.format(worker_id)) log_err = os.path.join(worker_dir, '{}_worker.err'.format(worker_id)) with open(worker_instruction_file, 'w') as f: json.dump(worker_instruction, f) logger.info( 'Running block for prediction (e:{}, t:{}, i:{}, p:{}) and worker instruction {}...' .format(experiment, train_number, iteration, predict_number, worker_id)) base_command = "python -u {} {}".format(predict_block, worker_instruction_file) if queue == "None": logger.warning("Running block **locally**, no queue provided.") if singularity_container == "None": logger.warning( "Running block in current environment, no singularity image provided." ) cmd = base_command else: cmd = run_singularity(base_command, singularity_container, mount_dirs=mount_dirs, execute=False, expand=False) else: logger.info("Running block on queue {} and container {}".format( queue, singularity_container)) cmd = run(command=base_command, queue=queue, num_gpus=1, num_cpus=num_cpus, singularity_image=singularity_container, mount_dirs=mount_dirs, execute=False, expand=False) daisy.call(cmd, log_out=log_out, log_err=log_err) logger.info('Predict worker finished')
worker_config = read_worker_config("worker_config.ini") data_config = read_data_config("data_config.ini") graph_config = read_graph_config("graph_config.ini") base_cmd = "python {} {} {} {} {}".format( graph_config["build_graph"], os.path.abspath("predict_config.ini"), os.path.abspath("worker_config.ini"), os.path.abspath("data_config.ini"), os.path.abspath("graph_config.ini")) if worker_config["singularity_container"] != "None" and worker_config[ "queue"] == "None": run_singularity(base_cmd, singularity_image=worker_config["singularity_container"], mount_dirs=worker_config["mount_dirs"], execute=True) elif worker_config["singularity_container"] != "None" and worker_config[ "queue"] != "None": run(base_cmd, singularity_image=worker_config["singularity_container"], mount_dirs=worker_config["mount_dirs"], queue=worker_config["queue"], num_cpus=worker_config["num_cpus"], num_gpus=0, batch=True, execute=True) else: assert (worker_config["singularity_container"] == "None") check_call(base_cmd, shell=True)