def start_distributed_task( job_type, task_index, evaluation_worker=False, shared_memory_scratchpad=shared_memory_scratchpad, gpu_id=None): task_parameters = DistributedTaskParameters( framework_type=args.framework, parameters_server_hosts=ps_hosts, worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers, experiment_path=args.experiment_path, shared_memory_scratchpad=shared_memory_scratchpad, seed=args.seed + task_index if args.seed is not None else None, # each worker gets a different seed checkpoint_save_secs=args.checkpoint_save_secs, checkpoint_restore_path=args. checkpoint_restore_dir, # MonitoredTrainingSession only supports a dir checkpoint_save_dir=args.checkpoint_save_dir, export_onnx_graph=args.export_onnx_graph, apply_stop_condition=args.apply_stop_condition) if gpu_id is not None: set_gpu(gpu_id) # we assume that only the evaluation workers are rendering graph_manager.visualization_parameters.render = args.render and evaluation_worker p = Process(target=start_graph, args=(graph_manager, task_parameters)) # p.daemon = True p.start() return p
dir_prefix = args.dir_prefix preset = args.preset levels = args.level.split(',') if args.level is not None else [None] num_seeds = args.seeds num_workers = args.num_workers gpu = [int(gpu) for gpu in args.gpu.split(',')] level_as_sub_dir = args.level_as_sub_dir processes = [] gpu_list = force_list(gpu) curr_gpu_idx = 0 for level in levels: for seed in range(num_seeds): # select the next gpu for this run set_gpu(gpu_list[curr_gpu_idx]) command = [ 'python3', 'rl_coach/coach.py', '-ns', '-p', '{}'.format(preset), '--seed', '{}'.format(seed), '-n', '{}'.format(num_workers) ] if dir_prefix != "": dir_prefix += "_" if args.use_cpu: command.append("-c") if args.evaluation_worker: command.append("-ew") if level is not None: command.extend(['-lvl', '{}'.format(level)]) if level_as_sub_dir: