def _thread_main(self): try: from returnn.util import better_exchook better_exchook.install() while self.batches.has_more() and not self.coord.should_stop(): enqueue_args = self.get_next_batch(consider_batch_slice=True) if enqueue_args is not None: if self.queue: self.queue.put(enqueue_args) else: self.tf_queue.enqueue(tf_session=self.tf_session, data=enqueue_args) with self.state_change_cond: self.state_change_cond.notifyAll() self.batches.advance(1) self.reached_end = not self.batches.has_more() except Exception as exc: print("Exception in DataProvider thread: %r" % exc, file=log.v1) sys.excepthook(*sys.exc_info()) finally: with self.state_change_cond: self.thread_finished = True self.state_change_cond.notifyAll()
def checkExec(): if "--forkExecProc" in sys.argv: try: from returnn.util import better_exchook except ImportError: pass # Doesn't matter. else: better_exchook.install() argidx = sys.argv.index("--forkExecProc") writeFileNo = int(sys.argv[argidx + 1]) readFileNo = int(sys.argv[argidx + 2]) readend = os.fdopen(readFileNo, "rb") writeend = os.fdopen(writeFileNo, "wb") unpickler = Unpickler(readend) name = unpickler.load() if ExecingProcess.Verbose: print("ExecingProcess child %s (pid %i)" % (name, os.getpid())) try: target = unpickler.load() args = unpickler.load() except EOFError: print("Error: unpickle incomplete") raise SystemExit ret = target(*args) sys.exited = True # IOError is probably broken pipe. That probably means that the parent died. try: Pickler(writeend).dump(ret) except IOError: pass try: readend.close() except IOError: pass try: writeend.close() except IOError: pass if ExecingProcess.Verbose: print("ExecingProcess child %s (pid %i) finished" % (name, os.getpid())) raise SystemExit
def setup(): """ Calls necessary setups. """ # Disable extensive TF debug verbosity. Must come before the first TF import. import logging logging.getLogger('tensorflow').disabled = True # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' # logging.getLogger("tensorflow").setLevel(logging.INFO) # Get us some further useful debug messages (in some cases, e.g. CUDA). # For example: https://github.com/tensorflow/tensorflow/issues/24496 # os.environ["CUDNN_LOGINFO_DBG"] = "1" # os.environ["CUDNN_LOGDEST_DBG"] = "stdout" # The following might fix (workaround): Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR # (https://github.com/tensorflow/tensorflow/issues/24496). # os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true" import _setup_returnn_env # noqa import returnn.util.basic as util util.init_thread_join_hack() from returnn.util import better_exchook better_exchook.install() better_exchook.replace_traceback_format_tb() from returnn.log import log log.initialize(verbosity=[5]) # TF is optional. # Note that importing TF still has a small side effect: # BackendEngine._get_default_engine() will return TF by default, if TF is already loaded. # For most tests, this does not matter. try: import tensorflow as tf except ImportError: tf = None if tf: import returnn.tf.util.basic as tf_util tf_util.debug_register_better_repr() import returnn.util.debug as debug debug.install_lib_sig_segfault() try: import faulthandler # Enable after libSigSegfault, so that we have both, # because faulthandler will also call the original sig handler. faulthandler.enable() except ImportError: print("no faulthandler")
def _demo(): def _make_int_list(s): """ :param str s: :rtype: list[int] """ return [int(s_) for s_ in s.split(",")] from returnn.util import better_exchook better_exchook.install() from argparse import ArgumentParser arg_parser = ArgumentParser() arg_parser.add_argument("--states", type=_make_int_list, default=[0]) arg_parser.add_argument("--inputs", type=_make_int_list, default=[0]) # Other example FST files can be found online, e.g.: https://github.com/placebokkk/gofst/tree/master/ex01 # Or: https://github.com/zh794390558/deeplearning/tree/master/kaldi/fst/data/test arg_parser.add_argument("--fst", default=returnn_dir + "/tests/lexicon_opt.fst") args = arg_parser.parse_args() # Try to compile now. get_tf_mod(verbose=True) # Some demo. assert os.path.exists(args.fst) fst_tf = get_fst(filename=args.fst) states_tf = tf.compat.v1.placeholder(tf.int32, [None]) inputs_tf = tf.compat.v1.placeholder(tf.int32, [None]) output_tf = fst_transition(fst_handle=fst_tf, states=states_tf, inputs=inputs_tf) with tf.compat.v1.Session() as session: out_next_states, out_labels, out_scores = session.run(output_tf, feed_dict={ states_tf: args.states, inputs_tf: args.inputs }) print("states:", args.states) print("inputs:", args.inputs) print("output next states:", out_next_states) print("output labels:", out_labels) print("output scores:", out_scores)
"[]", "[1]", "[1,2,3]", "{}", "{'a': 'b', 1: 2}", "{1}", "{1,2,3}", "None", "{'a': None, 'b':1, 'c':None, 'd':'d'}", ] for s in checks: check(s) if __name__ == "__main__": better_exchook.install() if len(sys.argv) <= 1: for k, v in sorted(globals().items()): if k.startswith("test_"): print("-" * 40) print("Executing: %s" % k) try: v() except unittest.SkipTest as exc: print("SkipTest:", exc) print("-" * 40) print("Finished all tests.") else: assert len(sys.argv) >= 2 for arg in sys.argv[1:]: print("Executing: %s" % arg)
def main(): """ Main entry. """ global LstmCellTypes print("Benchmarking LSTMs.") better_exchook.install() print("Args:", " ".join(sys.argv)) arg_parser = ArgumentParser() arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys())) arg_parser.add_argument("--no-cpu", action="store_true") arg_parser.add_argument("--no-gpu", action="store_true") arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes) arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true") args = arg_parser.parse_args() for opt in args.cfg: key, value = opt.split("=", 1) assert key in base_settings value_type = type(base_settings[key]) base_settings[key] = value_type(value) print("Settings:") pprint(base_settings) log.initialize(verbosity=[4]) print("Returnn:", describe_returnn_version(), file=log.v3) print("TensorFlow:", describe_tensorflow_version(), file=log.v3) print("Python:", sys.version.replace("\n", ""), sys.platform) if not args.no_setup_tf_thread_pools: setup_tf_thread_pools(log_file=log.v2) else: print( "Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores." ) if args.no_gpu: print("GPU will not be used.") else: print("GPU available: %r" % is_gpu_available()) print_available_devices() if args.selected: LstmCellTypes = args.selected.split(",") benchmarks = {} if not args.no_gpu and is_gpu_available(): for lstm_unit in LstmCellTypes: benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True) if not args.no_cpu: for lstm_unit in LstmCellTypes: if lstm_unit in GpuOnlyCellTypes: continue benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False) print("-" * 20) print("Settings:") pprint(base_settings) print("Final results:") for t, lstm_unit in sorted([ (t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items()) ]): print(" %s: %s" % (lstm_unit, hms_fraction(t))) print("Done.")
def _start_child(self, epoch, get_dim_only=False): """ :param int|None epoch: :param bool get_dim_only: """ assert self.child_pid is None assert self.reader_thread is None self.pipe_c2p = self._pipe_open() self.pipe_p2c = self._pipe_open() args = self._build_sprint_args() print("%s: epoch" % self, epoch, "exec", args, file=log.v5) pid = os.fork() if pid == 0: # child # In case we are in some test environment or so, recover the original stdout/stderr. sys.stdin = sys.__stdin__ sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ from returnn.util import better_exchook better_exchook.install() # noinspection PyBroadException try: sys.stdin.close() # Force no tty stdin. self.pipe_c2p[0].close() self.pipe_p2c[1].close() os.execv(args[0], args) # Does not return if successful. print("%s child exec failed." % self) except BaseException: print("%s child: Error when starting Sprint %r." % (self, args)) sys.excepthook(*sys.exc_info()) finally: print("%s child: exit" % self) # noinspection PyProtectedMember,PyUnresolvedReferences os._exit(1) return # Not reached. # parent self.pipe_c2p[1].close() self.pipe_p2c[0].close() self.child_pid = pid try: init_signal, (input_dim, output_dim, num_segments) = self._read_next_raw() assert init_signal == b"init" assert isinstance(input_dim, int) and isinstance(output_dim, int) # Ignore num_segments. It can be totally different than the real number of sequences. self.set_dimensions(input_dim, output_dim) except Exception: print("%s: Sprint child process (%r) caused an exception." % (self, args), file=log.v1) sys.excepthook(*sys.exc_info()) self._exit_child(wait_thread=False) raise Exception("%s Sprint init failed" % self) if get_dim_only: self._exit_child(wait_thread=False) else: self.reader_thread = Thread(target=self._reader_thread_proc, args=(pid, epoch), name="%s reader thread" % self) self.reader_thread.daemon = True self.reader_thread.start()
def demo(): """ Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set, given some config. """ from returnn.util import better_exchook better_exchook.install() import returnn.__main__ as rnn import sys if len(sys.argv) <= 1: print( "usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__) print(( "example usage: " "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001" ) % __file__) rnn.init_config(command_line_options=sys.argv[1:]) # noinspection PyProtectedMember rnn.config._hack_value_reading_debug() rnn.config.update({"log": []}) rnn.init_log() rnn.init_backend_engine() check_lr = rnn.config.bool("check_learning_rates", False) from returnn.pretrain import pretrain_from_config pretrain = pretrain_from_config(rnn.config) first_non_pretrain_epoch = 1 pretrain_learning_rate = None if pretrain: first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1 log.initialize(verbosity=[5]) control = load_learning_rate_control_from_config(rnn.config) print("LearningRateControl: %r" % control) if not control.epoch_data: print("No epoch data so far.") return first_epoch = min(control.epoch_data.keys()) if first_epoch != 1: print("Strange, first epoch from epoch data is %i." % first_epoch) print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error)) if pretrain: pretrain_learning_rate = rnn.config.float( 'pretrain_learning_rate', control.default_learning_rate) max_epoch = max(control.epoch_data.keys()) for epoch in range(1, max_epoch + 2): # all epochs [1..max_epoch+1] old_learning_rate = None if epoch in control.epoch_data: old_learning_rate = control.epoch_data[epoch].learning_rate if epoch < first_non_pretrain_epoch: learning_rate = pretrain_learning_rate s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % ( epoch, learning_rate, old_learning_rate) elif 1 < first_non_pretrain_epoch == epoch: learning_rate = control.default_learning_rate s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % ( epoch, learning_rate, old_learning_rate) else: learning_rate = control.calc_new_learning_rate_for_epoch(epoch) s = "Calculated learning rate for epoch %i: %s (was: %s)" % ( epoch, learning_rate, old_learning_rate) if learning_rate < control.min_learning_rate: learning_rate = control.min_learning_rate s += ", clipped to %s" % learning_rate s += ", previous relative error: %s" % control.calc_relative_error( epoch - 2, epoch - 1) if hasattr(control, "_calc_recent_mean_relative_error"): # noinspection PyProtectedMember s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error( epoch) print(s) if check_lr and old_learning_rate is not None: if old_learning_rate != learning_rate: print("Learning rate is different in epoch %i!" % epoch) sys.exit(1) # Overwrite new learning rate so that the calculation for further learning rates stays consistent. if epoch in control.epoch_data: control.epoch_data[epoch].learning_rate = learning_rate else: control.epoch_data[epoch] = control.EpochData( learningRate=learning_rate) print("Finished, last stored epoch was %i." % max_epoch)
def _demo(): import sys import os my_dir = os.path.dirname(os.path.abspath(__file__)) root_dir = os.path.dirname(os.path.dirname(my_dir)) assert os.path.exists("%s/returnn/__init__.py" % root_dir) sys.path.insert(0, root_dir) from returnn.util import better_exchook better_exchook.install() import argparse arg_parser = argparse.ArgumentParser() arg_parser.add_argument("--vocab", required=True) arg_parser.add_argument("--unk") arg_parser.add_argument("--input", help="text. if not given, will read from stdin") arg_parser.add_argument("--seed", type=int, default=0) arg_parser.add_argument("--all", action="store_true") arg_parser.add_argument( "--breadth-prob", type=float, default=0.0, help= "1.0 will lead to breadth-first search, 0.0 to depth-first search. other values are stochastic." ) args = arg_parser.parse_args() from returnn.datasets.util.vocabulary import Vocabulary vocab = Vocabulary(vocab_file=args.vocab, unknown_label=None) rnd = numpy.random.RandomState(args.seed) if args.input: bpe_prefix_tree = PrefixTree() for bpe_sym in vocab.labels: bpe_prefix_tree.add(bpe_sym) def _sampler(): # When this returns true, it will differ from depth-first search. return rnd.random_sample() <= args.breadth_prob for word in args.input.split(): if args.all: bpe_sym_seqs = CharSyncSearch(bpe=bpe_prefix_tree, word=word).search() print("%s: %s" % (word, bpe_sym_seqs)) else: greedy = DepthFirstSearch(bpe=bpe_prefix_tree, word=word, sampler=_sampler).search() print("%s: %s" % (word, " ".join(greedy))) return bpe = SamplingBytePairEncoder(labels=vocab.labels, breadth_prob=args.breadth_prob, rnd=rnd, unknown_label=args.unk) print("Reading from stdin:") while True: try: line = sys.stdin.readline() if line == "": # EOF return except KeyboardInterrupt: return line = line.strip() print(" ".join(bpe.segment_sentence(line)))