def init_backend_engine(): """ Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`. """ BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import returnn.theano.util returnn.theano.util.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % (os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import returnn.tf.horovod hvd = returnn.tf.horovod.get_ctx(config=config) import socket if "gpu" in config.value("device", "") or os.environ.get( "CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault( "gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % (socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", hvd.get_reduce_type(), file=log.v3) from returnn.tf.util.basic import debug_register_better_repr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) from returnn.tf.native_op import OpMaker OpMaker.log_stream = log.v3 debug_register_better_repr() if config.is_true("distributed_tf"): import returnn.tf.distributed returnn.tf.distributed.init_distributed_tf(config) else: raise NotImplementedError
def main(): """ Main entry. """ global LstmCellTypes print("Benchmarking LSTMs.") better_exchook.install() print("Args:", " ".join(sys.argv)) arg_parser = ArgumentParser() arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys())) arg_parser.add_argument("--no-cpu", action="store_true") arg_parser.add_argument("--no-gpu", action="store_true") arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes) arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true") args = arg_parser.parse_args() for opt in args.cfg: key, value = opt.split("=", 1) assert key in base_settings value_type = type(base_settings[key]) base_settings[key] = value_type(value) print("Settings:") pprint(base_settings) log.initialize(verbosity=[4]) print("Returnn:", describe_returnn_version(), file=log.v3) print("TensorFlow:", describe_tensorflow_version(), file=log.v3) print("Python:", sys.version.replace("\n", ""), sys.platform) if not args.no_setup_tf_thread_pools: setup_tf_thread_pools(log_file=log.v2) else: print( "Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores." ) if args.no_gpu: print("GPU will not be used.") else: print("GPU available: %r" % is_gpu_available()) print_available_devices() if args.selected: LstmCellTypes = args.selected.split(",") benchmarks = {} if not args.no_gpu and is_gpu_available(): for lstm_unit in LstmCellTypes: benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True) if not args.no_cpu: for lstm_unit in LstmCellTypes: if lstm_unit in GpuOnlyCellTypes: continue benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False) print("-" * 20) print("Settings:") pprint(base_settings) print("Final results:") for t, lstm_unit in sorted([ (t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items()) ]): print(" %s: %s" % (lstm_unit, hms_fraction(t))) print("Done.")