def start_distributed_task(job_type, task_index, evaluation_worker=False): task_parameters = DistributedTaskParameters( framework_type= "tensorflow", # TODO: tensorflow should'nt be hardcoded parameters_server_hosts=ps_hosts, worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, evaluate_only=evaluation_worker, use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers, experiment_path=args.experiment_path, shared_memory_scratchpad=None, seed=args.seed + task_index if args.seed is not None else None) # each worker gets a different seed task_parameters.__dict__ = add_items_to_dict( task_parameters.__dict__, args.__dict__) # we assume that only the evaluation workers are rendering graph_manager.visualization_parameters.render = args.render and evaluation_worker start_graph(graph_manager, task_parameters) #p = Process(target=start_graph, args=(graph_manager, task_parameters)) #p.start() return
def start_distributed_task( job_type, task_index, evaluation_worker=False, shared_memory_scratchpad=shared_memory_scratchpad): task_parameters = DistributedTaskParameters( framework_type=args.framework, parameters_server_hosts=ps_hosts, worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers, experiment_path=args.experiment_path, shared_memory_scratchpad=shared_memory_scratchpad, seed=args.seed + task_index if args.seed is not None else None, # each worker gets a different seed checkpoint_save_secs=args.checkpoint_save_secs, checkpoint_restore_path=args. checkpoint_restore_dir, # MonitoredTrainingSession only supports a dir checkpoint_save_dir=args.checkpoint_save_dir, export_onnx_graph=args.export_onnx_graph, apply_stop_condition=args.apply_stop_condition) # we assume that only the evaluation workers are rendering graph_manager.visualization_parameters.render = args.render and evaluation_worker p = Process(target=start_graph, args=(graph_manager, task_parameters)) # p.daemon = True p.start() return p