def main(): arg_parser = ArgumentParser() arg_parser.add_argument("--try_subsets", action="store_true") arg_parser.add_argument("--visible_device_list") arg_parser.add_argument("--use_device_lib", action="store_true") args = arg_parser.parse_args() orig_cuda_visible_devs_str = os.environ.get("CUDA_VISIBLE_DEVICES", None) print("original CUDA_VISIBLE_DEVICES:", orig_cuda_visible_devs_str) tf_session_opts = {} if args.visible_device_list: tf_session_opts.setdefault("gpu_options", {})["visible_device_list"] = args.visible_device_list print("Using TF gpu_options.visible_device_list %r" % args.visible_device_list) setup_tf_thread_pools(tf_session_opts=tf_session_opts) print_available_devices(tf_session_opts=tf_session_opts) dump_devs(tf_session_opts=tf_session_opts, use_device_lib=args.use_device_lib, filter_gpu=False) if args.try_subsets: print("Trying subsets of CUDA_VISIBLE_DEVICES to see whether list_local_devices is cached.") cuda_visible_devs_str = orig_cuda_visible_devs_str while cuda_visible_devs_str: cuda_visible_devs_str = ",".join(cuda_visible_devs_str.split(",")[:-1]) print("set CUDA_VISIBLE_DEVICES:", cuda_visible_devs_str) os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devs_str dump_devs(tf_session_opts=tf_session_opts, use_device_lib=args.use_device_lib) os.environ["CUDA_VISIBLE_DEVICES"] = orig_cuda_visible_devs_str print("Recovered original CUDA_VISIBLE_DEVICES") dump_devs(tf_session_opts=tf_session_opts, use_device_lib=args.use_device_lib) print("Quit.")
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import TheanoUtil TheanoUtil.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % (os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import socket import horovod.tensorflow as hvd from TFUtil import init_horovod init_horovod() # make sure it is initialized if "gpu" in config.value("device", "") or os.environ.get( "CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault( "gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % (socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) horovod_reduce_type = config.value("horovod_reduce_type", "") if horovod_reduce_type == "": horovod_reduce_type = "grad" config.set("horovod_reduce_type", horovod_reduce_type) else: assert horovod_reduce_type in [ "grad", "param" ], "config option 'horovod_reduce_type' invalid" if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3) from TFUtil import debugRegisterBetterRepr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) debugRegisterBetterRepr() else: raise NotImplementedError
def _check_devices(self): from TFUtil import print_available_devices, is_gpu_available print_available_devices() assert len(self.devices_config) == 1, "multiple devices not supported yet for TF" if self.is_requesting_for_gpu(): assert is_gpu_available(), "no GPU available" else: if is_gpu_available(): print("Note: There is a GPU available but you have set device=cpu.", file=log.v2)
def main(): global LstmCellTypes print("Benchmarking LSTMs.") better_exchook.install() print("Args:", " ".join(sys.argv)) arg_parser = ArgumentParser() arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys())) arg_parser.add_argument("--no-cpu", action="store_true") arg_parser.add_argument("--no-gpu", action="store_true") arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes) arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true") args = arg_parser.parse_args() for opt in args.cfg: key, value = opt.split("=", 1) assert key in base_settings value_type = type(base_settings[key]) base_settings[key] = value_type(value) print("Settings:") pprint(base_settings) log.initialize(verbosity=[4]) print("Returnn:", describe_crnn_version(), file=log.v3) print("TensorFlow:", describe_tensorflow_version(), file=log.v3) print("Python:", sys.version.replace("\n", ""), sys.platform) if not args.no_setup_tf_thread_pools: setup_tf_thread_pools(log_file=log.v2) else: print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.") if args.no_gpu: print("GPU will not be used.") else: print("GPU available: %r" % is_gpu_available()) print_available_devices() if args.selected: LstmCellTypes = args.selected.split(",") benchmarks = {} if not args.no_gpu and is_gpu_available(): for lstm_unit in LstmCellTypes: benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True) if not args.no_cpu: for lstm_unit in LstmCellTypes: if lstm_unit in GpuOnlyCellTypes: continue benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False) print("-" * 20) print("Settings:") pprint(base_settings) print("Final results:") for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]): print(" %s: %s" % (lstm_unit, hms_fraction(t))) print("Done.")
def main(): global LstmCellTypes print("Benchmarking LSTMs.") better_exchook.install() print("Args:", " ".join(sys.argv)) arg_parser = ArgumentParser() arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys())) arg_parser.add_argument("--no-cpu", action="store_true") arg_parser.add_argument("--no-gpu", action="store_true") arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes) arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true") args = arg_parser.parse_args() for opt in args.cfg: key, value = opt.split("=", 1) assert key in base_settings value_type = type(base_settings[key]) base_settings[key] = value_type(value) print("Settings:") pprint(base_settings) log.initialize(verbosity=[4]) print("Returnn:", describe_returnn_version(), file=log.v3) print("TensorFlow:", describe_tensorflow_version(), file=log.v3) print("Python:", sys.version.replace("\n", ""), sys.platform) if not args.no_setup_tf_thread_pools: setup_tf_thread_pools(log_file=log.v2) else: print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.") if args.no_gpu: print("GPU will not be used.") else: print("GPU available: %r" % is_gpu_available()) print_available_devices() if args.selected: LstmCellTypes = args.selected.split(",") benchmarks = {} if not args.no_gpu and is_gpu_available(): for lstm_unit in LstmCellTypes: benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True) if not args.no_cpu: for lstm_unit in LstmCellTypes: if lstm_unit in GpuOnlyCellTypes: continue benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False) print("-" * 20) print("Settings:") pprint(base_settings) print("Final results:") for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]): print(" %s: %s" % (lstm_unit, hms_fraction(t))) print("Done.")
def init_backend_engine(): """ Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`. """ BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import TheanoUtil TheanoUtil.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % ( os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import socket # noinspection PyPackageRequirements,PyUnresolvedReferences import horovod.tensorflow as hvd from TFUtil import init_horovod init_horovod() # make sure it is initialized if "gpu" in config.value("device", "") or os.environ.get("CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault("gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % ( socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) horovod_reduce_type = config.value("horovod_reduce_type", "") if horovod_reduce_type == "": horovod_reduce_type = "grad" config.set("horovod_reduce_type", horovod_reduce_type) else: assert horovod_reduce_type in ["grad", "param"], "config option 'horovod_reduce_type' invalid" if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3) from TFUtil import debug_register_better_repr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) debug_register_better_repr() else: raise NotImplementedError