Ejemplo n.º 1
0
 def _check_devices(self):
   from TFUtil import print_available_devices, is_gpu_available
   print_available_devices()
   assert len(self.devices_config) == 1, "multiple devices not supported yet for TF"
   if self.is_requesting_for_gpu():
     assert is_gpu_available(), "no GPU available"
   else:
     if is_gpu_available():
       print("Note: There is a GPU available but you have set device=cpu.", file=log.v2)
Ejemplo n.º 2
0
def main():
  global LstmCellTypes
  print("Benchmarking LSTMs.")
  better_exchook.install()
  print("Args:", " ".join(sys.argv))
  arg_parser = ArgumentParser()
  arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys()))
  arg_parser.add_argument("--no-cpu", action="store_true")
  arg_parser.add_argument("--no-gpu", action="store_true")
  arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes)
  arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
  args = arg_parser.parse_args()
  for opt in args.cfg:
    key, value = opt.split("=", 1)
    assert key in base_settings
    value_type = type(base_settings[key])
    base_settings[key] = value_type(value)
  print("Settings:")
  pprint(base_settings)

  log.initialize(verbosity=[4])
  print("Returnn:", describe_crnn_version(), file=log.v3)
  print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
  print("Python:", sys.version.replace("\n", ""), sys.platform)
  if not args.no_setup_tf_thread_pools:
    setup_tf_thread_pools(log_file=log.v2)
  else:
    print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.")
  if args.no_gpu:
    print("GPU will not be used.")
  else:
    print("GPU available: %r" % is_gpu_available())
  print_available_devices()

  if args.selected:
    LstmCellTypes = args.selected.split(",")
  benchmarks = {}
  if not args.no_gpu and is_gpu_available():
    for lstm_unit in LstmCellTypes:
      benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True)
  if not args.no_cpu:
    for lstm_unit in LstmCellTypes:
      if lstm_unit in GpuOnlyCellTypes:
        continue
      benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False)

  print("-" * 20)
  print("Settings:")
  pprint(base_settings)
  print("Final results:")
  for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]):
    print("  %s: %s" % (lstm_unit, hms_fraction(t)))
  print("Done.")
Ejemplo n.º 3
0
def main():
  global LstmCellTypes
  print("Benchmarking LSTMs.")
  better_exchook.install()
  print("Args:", " ".join(sys.argv))
  arg_parser = ArgumentParser()
  arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys()))
  arg_parser.add_argument("--no-cpu", action="store_true")
  arg_parser.add_argument("--no-gpu", action="store_true")
  arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes)
  arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
  args = arg_parser.parse_args()
  for opt in args.cfg:
    key, value = opt.split("=", 1)
    assert key in base_settings
    value_type = type(base_settings[key])
    base_settings[key] = value_type(value)
  print("Settings:")
  pprint(base_settings)

  log.initialize(verbosity=[4])
  print("Returnn:", describe_returnn_version(), file=log.v3)
  print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
  print("Python:", sys.version.replace("\n", ""), sys.platform)
  if not args.no_setup_tf_thread_pools:
    setup_tf_thread_pools(log_file=log.v2)
  else:
    print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.")
  if args.no_gpu:
    print("GPU will not be used.")
  else:
    print("GPU available: %r" % is_gpu_available())
  print_available_devices()

  if args.selected:
    LstmCellTypes = args.selected.split(",")
  benchmarks = {}
  if not args.no_gpu and is_gpu_available():
    for lstm_unit in LstmCellTypes:
      benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True)
  if not args.no_cpu:
    for lstm_unit in LstmCellTypes:
      if lstm_unit in GpuOnlyCellTypes:
        continue
      benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False)

  print("-" * 20)
  print("Settings:")
  pprint(base_settings)
  print("Final results:")
  for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]):
    print("  %s: %s" % (lstm_unit, hms_fraction(t)))
  print("Done.")
Ejemplo n.º 4
0
 def _get_devices_config(self):
   """
   :rtype: list[dict[str]]
   """
   from Device import getDevicesInitArgs
   if not self.config.value("device", None):
     # Better default: Use GPU if available.
     from TFUtil import is_gpu_available
     if is_gpu_available():
       print("Device not set explicitly, and we found a GPU, which we will use.", file=log.v2)
       self.config.set("device", "gpu")
     else:
       print("Device not set explicitly, and no GPU found.", file=log.v2)
   return getDevicesInitArgs(self.config)
Ejemplo n.º 5
0
def test_GradOfLstmGenericBase_simple_nan():
    print("test_GradOfLstmGenericBase_simple_nan()")
    print("GPU available:", is_gpu_available())
    print("Create LSTM op...")
    from TFNativeOp import make_lstm_op
    op_func = make_lstm_op(compiler_opts=dict(verbose=True))
    print("op_func:", op_func)

    def dummy_call():
        n_time = 1
        n_batch = 1
        n_out = 1
        Z = tf.zeros((n_time, n_batch, n_out * 4))
        V_h = tf.zeros((n_out, n_out * 4))
        c = tf.zeros((n_batch, n_out))
        i = tf.ones((n_time, n_batch))
        return op_func(Z, V_h, c, i)

    dummy = dummy_call()
    with tf.Session() as session:
        print("dummy out:", session.run(list(dummy)))
        grad_op = _lstm_grad_op(session)
        args = _demo_lstm_grad_args()
        placeholders = [tf.placeholder(v.dtype) for v in args]
        lstm_grad_t = list(grad_op(*placeholders))
        for kwargs in [
            {}
        ]:  # [{"factor": 0}, {"ones_like": True}, {"ones_like": True, "factor": -1}, {}]:
            print("Testing lstm grad args %r." % kwargs)
            args = _demo_lstm_grad_args(**kwargs)
            outs = session.run(lstm_grad_t,
                               feed_dict=dict(zip(placeholders, args)))
            for out, descr, i in zip(outs,
                                     ["z", "out_v_h", "out_c", "dummy_out"],
                                     range(4)):
                assert isinstance(out, numpy.ndarray)
                print("(%i) %s:" % (i, descr))
                print(out)
            for out in outs:
                assert numpy.all(numpy.isfinite(out))
            print("Seems ok.")
        print("All ok!")
Ejemplo n.º 6
0
    print(expected)
    fsa = Fsa.fast_bw_fsa_staircase(seq_lens=[num_classes],
                                    with_loop=with_loop)
    with TFCompat.v1.Session().as_default():
        res = tf_baum_welch(fsa,
                            num_classes=num_classes,
                            out_seq_len=out_seq_len)
    print("baum-welch:")
    print(res)
    is_close = numpy.isclose(expected, res).all()
    print("close:", is_close)
    assert is_close


# Note: we could replace tf_baum_welch by some CPU/Python code...
@unittest.skipIf(not is_gpu_available(),
                 "no gpu on this system; needed for tf_baum_welch")
def test_fast_bw_fsa_staircase():
    check_fast_bw_fsa_staircase(2, 2, with_loop=False)
    check_fast_bw_fsa_staircase(2, 2, with_loop=True)
    check_fast_bw_fsa_staircase(3, 2, with_loop=False)
    check_fast_bw_fsa_staircase(3, 2, with_loop=True)
    check_fast_bw_fsa_staircase(3, 3, with_loop=False)
    check_fast_bw_fsa_staircase(3, 3, with_loop=True)


if __name__ == "__main__":
    import better_exchook
    better_exchook.install()
    if len(sys.argv) <= 1:
        for k, v in sorted(globals().items()):
Ejemplo n.º 7
0
    config = Config()
    config.update({
      "num_outputs": 3,
      "num_inputs": 4,
      "network": {
        "output": {"class": "rec", "target": "classes", "unit": {
          "prob": {"class": "softmax", "from": ["prev:output"], "loss": "ce", "target": "classes"},
          "output": {"class": "choice", "beam_size": 4, "from": ["prob"], "target": "classes", "initial_output": 0}
        }},
      }
    })
    network = TFNetwork(config=config, train_flag=True)
    network.construct_from_dict(config.typed_dict["network"])


@unittest.skipIf(not is_gpu_available(), "no gpu on this system")
def test_RecLayer_get_cudnn_params_size():
  from tensorflow.contrib.cudnn_rnn.ops.gen_cudnn_rnn_ops import cudnn_rnn_params_size

  def check(num_units, input_size,
            rnn_mode="lstm", num_layers=1, direction="unidirectional", input_mode="linear_input",
            T=tf.float32, S=tf.int32):
    common_kwargs = dict(
      rnn_mode=rnn_mode, num_units=num_units, input_size=input_size,
      num_layers=num_layers, direction=direction, input_mode=input_mode)
    cu_size = cudnn_rnn_params_size(T=T, S=S, **common_kwargs)[0]
    my_size = RecLayer._get_cudnn_param_size(**common_kwargs)
    assert_equal(cu_size.eval(), my_size)

  with tf.Session():
    check(rnn_mode="lstm", num_units=5, input_size=3)
Ejemplo n.º 8
0
  for t in range(n_time):
    ta = ta.write(index=t, value=x[t])
  y = ta.stack()
  y.set_shape(tf.TensorShape((n_time, n_dim)))
  # y = y[::1]  -- if you add this, the test passes
  dx, = tf.gradients(ys=[y], grad_ys=[dy], xs=[x])
  vx, vdy, vy, vdx = session.run([x, dy, y, dx])
  print("x:", vx)
  print("y:", vy)
  print("dy:", vdy)
  print("dx:", vdx)
  assert_allclose(vx, vy)
  assert_allclose(vdy, vdx)


@unittest.skipIf(not is_gpu_available(), "no gpu on this system")
def test_FastBaumWelch():
  print("Make op...")
  op = make_fast_baum_welch_op(compiler_opts=dict(verbose=True))  # will be cached, used inside :func:`fast_baum_welch`
  print("Op:", op)
  n_batch = 3
  seq_len = 5
  n_classes = 10
  from Fsa import FastBwFsaShared
  fsa = FastBwFsaShared()
  fsa.add_inf_loop(state_idx=0, num_emission_labels=n_classes)
  fast_bw_fsa = fsa.get_fast_bw_fsa(n_batch=n_batch)
  edges = tf.constant(fast_bw_fsa.edges, dtype=tf.int32)
  weights = tf.constant(fast_bw_fsa.weights, dtype=tf.float32)
  start_end_states = tf.constant(fast_bw_fsa.start_end_states, dtype=tf.int32)
  am_scores = tf.constant(numpy.random.normal(size=(seq_len, n_batch, n_classes)), dtype=tf.float32)  # in -log space
Ejemplo n.º 9
0
  print("check_fast_bw_fsa_staircase(%i, %i, with_loop=%r)" % (num_classes, out_seq_len, with_loop))
  expected = slow_full_sum_staircase_uniform(num_classes=num_classes, out_seq_len=out_seq_len, with_loop=with_loop)
  print("expected full sum:")
  print(expected)
  fsa = Fsa.fast_bw_fsa_staircase(seq_lens=[num_classes], with_loop=with_loop)
  with tf.Session().as_default():
    res = tf_baum_welch(fsa, num_classes=num_classes, out_seq_len=out_seq_len)
  print("baum-welch:")
  print(res)
  is_close = numpy.isclose(expected, res).all()
  print("close:", is_close)
  assert is_close


# Note: we could replace tf_baum_welch by some CPU/Python code...
@unittest.skipIf(not is_gpu_available(), "no gpu on this system; needed for tf_baum_welch")
def test_fast_bw_fsa_staircase():
  check_fast_bw_fsa_staircase(2, 2, with_loop=False)
  check_fast_bw_fsa_staircase(2, 2, with_loop=True)
  check_fast_bw_fsa_staircase(3, 2, with_loop=False)
  check_fast_bw_fsa_staircase(3, 2, with_loop=True)
  check_fast_bw_fsa_staircase(3, 3, with_loop=False)
  check_fast_bw_fsa_staircase(3, 3, with_loop=True)


if __name__ == "__main__":
  import better_exchook
  better_exchook.install()
  if len(sys.argv) <= 1:
    for k, v in sorted(globals().items()):
      if k.startswith("test_"):
Ejemplo n.º 10
0
def test_RecLayer_NativeLstm_Nan():
    print("test_RecLayer_NativeLstm_Nan()")
    print("GPU available:", is_gpu_available())
    numpy.set_printoptions(precision=15)
    num_inputs = 4
    num_outputs = 3

    config = Config()
    config.update({
        "num_inputs": num_inputs,
        "num_outputs": {
            "data": [num_inputs, 2],
            "classes": [num_outputs, 2]
        },  # dense output
        "network": {
            "output": {
                "class": "rec",
                "unit": "NativeLSTM",
                "loss": "mse"
            }
        },
        "adam": True,
        "debug_grad_summaries": True,
        "debug_save_updater_vars": True,
        "debug_add_check_numerics_ops": True,
    })

    print("Reset default graph...")
    tf.reset_default_graph()
    print("Create network...")
    network = TFNetwork(config=config, train_flag=True)
    network.construct_from_dict(config.typed_dict["network"])

    # Depending on the seed, I get nan earlier, later, or not at all.
    # limit=5.0: seed=3 -> nan in step 4094. seed=1 -> nan in step 2463.
    random = numpy.random.RandomState(seed=1)
    limit = 10.0  # The higher, the more likely you get nan.

    def make_feed_dict(seq_len=10):
        return {
            network.extern_data.data["data"].placeholder:
            random.uniform(-limit, limit, (1, seq_len, num_inputs)),
            network.extern_data.data["data"].size_placeholder[0]:
            numpy.array([seq_len]),
            network.extern_data.data["classes"].placeholder:
            random.uniform(-limit, limit, (1, seq_len, num_outputs)),
            network.extern_data.data["classes"].size_placeholder[0]:
            numpy.array([seq_len]),
        }

    print("Creating session...")
    with tf.Session() as session:
        print("Init params...")
        network.initialize_params(session=session)
        print("Test run...")
        output_data1 = session.run(
            network.get_default_output_layer().output.placeholder,
            feed_dict=make_feed_dict(5))
        assert_equal(output_data1.shape,
                     (5, 1, num_outputs))  # (time, batch, dim)

        layer = network.layers["output"]
        loss_t = network.get_total_loss(
        ) * layer.get_loss_normalization_factor()
        weights_t = layer.params["W"]
        weights_grad_t, = tf.gradients(network.get_objective(), weights_t)

        def find_op_by_type(type_name):
            for op in session.graph.get_operations():
                assert isinstance(op, tf.Operation)
                if op.type == type_name:
                    return op

        lstm_grad_op = find_op_by_type("GradOfLstmGenericBase")
        assert lstm_grad_op is not None
        lstm_grad_ins_t = list(lstm_grad_op.inputs)
        lstm_grad_outs_t = list(lstm_grad_op.outputs)
        lstm_grad_func = _lstm_grad_op(session=session)
        demo_grad_t = lstm_grad_func(*_demo_lstm_grad_args())
        demo_grad2_input_placeholders = [
            tf.placeholder(v.dtype) for v in lstm_grad_ins_t
        ]
        demo_grad2_t = lstm_grad_func(*demo_grad2_input_placeholders)[1]

        print("Create updater...")
        from TFUpdater import Updater
        updater = Updater(config=config, network=network, tf_session=session)
        updater.set_trainable_vars(network.get_trainable_params())
        updater.set_learning_rate(0.1)
        optim_op = updater.get_optim_op()
        assert isinstance(updater.optimizer, tf.train.AdamOptimizer)
        adam_weights_m_t = updater.optimizer.get_slot(var=weights_t, name="m")
        adam_weights_v_t = updater.optimizer.get_slot(var=weights_t, name="v")
        assert isinstance(adam_weights_m_t, tf.Variable)
        assert isinstance(adam_weights_v_t, tf.Variable)
        summaries_t = tf.summary.merge_all()

        # https://github.com/tensorflow/tensorflow/blob/03beb65cecbc1e49ea477bca7f54543134b31d53/tensorflow/core/kernels/training_ops_gpu.cu.cc
        adam_update_t = adam_weights_m_t / (tf.sqrt(adam_weights_v_t) + 1e-8)

        import tempfile
        tmp_tf_logdir = tempfile.mkdtemp("tmp-tf-log")
        print("Write TF logs to:", tmp_tf_logdir)
        writer = tf.summary.FileWriter(tmp_tf_logdir)
        writer.add_graph(session.graph)

        print("Training...")
        recent_info = []  # type: list[dict[str]]
        for i in range(10000):
            feed_dict = make_feed_dict(5)
            weights_grad, lstm_grad_ins, lstm_grad_outs = session.run(
                [weights_grad_t, lstm_grad_ins_t, lstm_grad_outs_t],
                feed_dict=feed_dict)
            try:
                if not numpy.all(numpy.isfinite(weights_grad)):
                    raise Exception("weights_grad has inf or nan.")
                loss, _opt, summaries, weights, adam_update = session.run(
                    [loss_t, optim_op, summaries_t, weights_t, adam_update_t],
                    feed_dict=feed_dict)
            except Exception as exc:
                print("Exception in step %i." % i)
                print(exc)
                print("Most recent summaries:")
                summary_proto = tf.Summary()
                summary_proto.ParseFromString(recent_info[-1]["summaries"])
                for val in summary_proto.value:
                    # Assuming all summaries are scalars.
                    print("  %s: %r" % (val.tag, val.simple_value))
                print("Most recent weights:")
                print(recent_info[-1]["weights"])
                print("Current weights:")
                print(session.run(weights_t))
                print("Most recent Adam update:")
                print(recent_info[-1]["adam_update"])
                print("Current Adam update:")
                print(session.run(adam_update_t))
                print("Used weights grad:")
                print(weights_grad)
                print("GradOfLstmGenericBase inputs:")
                for t, v in zip(lstm_grad_ins_t, lstm_grad_ins):
                    print("%r:" % t)
                    print(repr(v))
                print("GradOfLstmGenericBase outputs:")
                for t, v in zip(lstm_grad_outs_t, lstm_grad_outs):
                    print("%r:" % t)
                    print(repr(v))
                print("Demo grad:")
                print(session.run(demo_grad_t))
                print("Demo grad2:")
                print(
                    session.run(
                        demo_grad2_t,
                        feed_dict={
                            k: v
                            for (k, v) in zip(demo_grad2_input_placeholders,
                                              lstm_grad_ins)
                        }))
                print("Demo grad2 via eval:")
                print(
                    session.run(
                        demo_grad2_t,
                        feed_dict={
                            k: eval(repr(v), vars(numpy))
                            for (k, v) in zip(demo_grad2_input_placeholders,
                                              lstm_grad_ins)
                        }))
                print("Demo grad2 via args:")
                print(
                    session.run(
                        demo_grad2_t,
                        feed_dict={
                            k: v
                            for (k, v) in zip(demo_grad2_input_placeholders,
                                              _demo_lstm_grad_args())
                        }))
                raise Exception("Exception in step %i." % i)
            writer.add_summary(summaries, global_step=i)
            if len(recent_info) > 1000:
                recent_info.pop(0)
            recent_info.append({
                "step": i,
                "loss": loss,
                "summaries": summaries,
                "weights": weights,
                "adam_update": adam_update
            })
            if not numpy.isfinite(loss) or i % 100 == 0:
                print("step %i, loss: %r" % (i, loss))
            assert numpy.isfinite(loss)

    print("Done.")
    import shutil
    shutil.rmtree(tmp_tf_logdir)