def test_reuse_dropout(): x1 = tx.Constant(np.ones(shape=[2, 4]), dtype=tf.float32) x2 = tx.Activation(x1) drop1 = tx.Dropout(x2, probability=0.5, locked=True) assert len(drop1.inputs) == 2 assert drop1.inputs[0] is x2 assert drop1.inputs[-1] is drop1.layer_state.mask # shared state overrides mask? _, mask = tx.dropout(x2, return_mask=True) drop2 = drop1.reuse_with(x2, mask) assert len(drop2.inputs) == 2 assert drop2.inputs[0] is x2 assert drop2.inputs[-1] is drop2.layer_state.mask assert not tx.tensor_equal(drop1(), drop2()) graph = tx.Graph.build(inputs=None, outputs=[drop1, drop2]) out1, out2 = graph() assert tx.tensor_equal(out1, out2) drop1 = tx.Dropout(x2, probability=0.5) drop2 = drop1.reuse_with(x1) graph.eval(drop1, drop2)
def test_add_optimizer(): target = tx.Constant([[1.]]) inputs = tx.Input(n_units=2, name="inputs") output = tx.Linear(inputs, n_units=1, name="y") loss = tx.Lambda(target, output, fn=tf.nn.softmax_cross_entropy_with_logits, name="xent") m = tx.Model(run_outputs=output, train_inputs=[inputs, target], train_loss=loss) lr = tx.Param(init_value=0.2, name="lr") optimizer1 = m.set_optimizer(tf.optimizers.SGD, lr=lr) optimizer2: tf.optimizers.Optimizer = m.optimizer assert optimizer1 == optimizer2 # optimizer is hashable opt_dict = {optimizer1: 0, optimizer2: 1} assert optimizer1 in opt_dict assert opt_dict[optimizer1] == 1 lr.value = 0.3 assert np.float32(0.3) == optimizer1.lr.numpy()
def test_linear(): inputs = tx.Constant(tf.ones([2, 4]), dtype=tf.float64) inputs2 = inputs * 2 linear = tx.Linear(inputs, n_units=8, dtype=tf.float64) w = linear.weights b = linear.bias assert w.shape == [4, 8] assert b.shape == [8] assert len(linear.trainable_variables) == 2 t1 = linear() t2 = linear() assert tx.tensor_equal(t1, t2) linear2 = tx.Linear(linear.inputs[0], 8, share_state_with=linear, dtype=tf.float64) t3 = linear2() assert tx.tensor_equal(t1, t3) linear = tx.Linear(inputs, 8, dtype=tf.float64) linear2 = linear.reuse_with(inputs2) assert linear.weights is linear2.weights assert linear.bias is linear2.bias assert tx.tensor_equal(linear() * 2, linear2())
def test_variable_checkpoint(tmp_path): inputs = tx.Constant(tf.ones([2, 4])) l1 = tx.Linear(inputs, 3, add_bias=True, name="l1") l2 = tx.Linear(inputs, 3, add_bias=False, name="l1") # track: AutoTrackable = l1.layer_state checkpoint = tf.train.Checkpoint(l1=l1) manager = tf.train.CheckpointManager(checkpoint, tmp_path / 'ckpts', max_to_keep=1) manager.save(1) # manager.save(2) l1.weights.assign(l2.weights.value()) status = checkpoint.restore(manager.latest_checkpoint) status.assert_existing_objects_matched() checkpoint_vars = tf.train.list_variables(manager.latest_checkpoint) assert len(checkpoint_vars) == 4 assert checkpoint_vars[0][0] == '_CHECKPOINTABLE_OBJECT_GRAPH' assert "l1/bias" in checkpoint_vars[1][0] assert "l1/weights" in checkpoint_vars[2][0] assert "save_counter" in checkpoint_vars[3][0]
def test_conv1d(): n_features = 3 embed_size = 128 seq_size = 3 batch_size = 2 inputs = tx.Constant(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) emb = tx.Lookup(inputs, seq_size=seq_size, embedding_shape=[n_features, embed_size]) seq = emb() n_units = 100 filter_size = 4 cnn = tf.keras.layers.Conv1D(filters=n_units, kernel_size=filter_size, padding='same') res = cnn(seq) cnn2 = tx.Conv1D(emb, n_units=100, filter_size=filter_size) res2 = cnn2(seq) assert len(cnn.variables) == len(cnn.variables) cnn.kernel = cnn2.filters cnn.bias = cnn2.bias res3 = cnn(seq) assert not tx.tensor_equal(res, res2) assert tx.tensor_equal(res2, res3)
def test_conv1d(): num_filters = 2 input_dim = 4 seq_size = 3 batch_size = 2 filter_size = 2 filter_shape = [filter_size, input_dim, num_filters] x = tf.ones([batch_size, seq_size, input_dim]) x_layer = tx.Constant(x, input_dim) filters = tf.ones(filter_shape) conv_layer = tx.Conv1D(x_layer, num_filters, filter_size, filters=filters) conv = tf.nn.conv1d(input=x, filters=filters, stride=1, padding="SAME", data_format="NWC") output = conv_layer() assert tx.tensor_equal(conv, output) assert tx.tensor_equal(tf.shape(conv_layer.filters), [filter_size, input_dim, num_filters]) assert tx.tensor_equal(tf.shape(output), [batch_size, seq_size, num_filters])
def test_mul(): # also tests graphs with constants inputs = tx.Constant(tf.constant(2), dtype=tf.float64) inputs2 = inputs * 2 assert tx.tensor_equal(inputs2(), inputs() * 2) inputs2_fn = tf.function(inputs2.__call__) assert inputs2_fn() == inputs2()
def test_module_reuse_order(): x1 = tx.Input([[2.]], n_units=1, name="x1") x2 = tx.Input([[2.]], n_units=1, name="x2") x3 = tx.Input([[1.]], n_units=1, name="x3") h = tx.Add(x2, x3) y = tx.Add(x1, h) module = tx.Module(inputs=[x1, x2, x3], output=y) x1_ = tx.Constant([[2.]], name="x1b") x2_ = tx.Constant([[2.]], name="x2b") m2 = module.reuse_with(x1_, x2_) m1 = module() m2 = m2() assert tx.tensor_equal(m1, m2)
def test_constant_save(tmp_path): tmp_path = tmp_path.joinpath("custom") save_path = str(tmp_path) l1 = tx.Constant(tf.constant(42.)) assert l1() == 42. tf.saved_model.save(l1, save_path) loaded = tf.saved_model.load(save_path) assert loaded() == 42.
def test_model_vars(): target = tx.Constant([[1.]]) inputs = tx.Input(n_units=2, name="inputs", constant=False) output = tx.Linear(inputs, n_units=1, name="y") loss = tx.Lambda(target, output, fn=tf.nn.softmax_cross_entropy_with_logits, name="xent") m = tx.Model(run_outputs=output, train_inputs=[inputs, target], train_loss=loss) assert m.trainable_variables == output.trainable_variables
def test_graph_input_order(): in1 = tx.Input(n_units=1, name="in1", dtype=tf.float32, constant=False) in2 = tx.Input(n_units=1, name="in2", dtype=tf.float32, constant=False) in12 = tx.Add(in1, in2) in3 = tx.Constant(tf.ones(shape=[1], dtype=tf.float32)) in123 = tx.Add(in12, in3) graph = tx.Graph.build(inputs=None, outputs=in123) # print("\n") # for layer,p in graph.dependency_iter().items(): # print(layer.name) # print(p) print(list(map(lambda x: x.name, graph.in_nodes)))
def test_lookup_sequence_sparse(): input_dim = 10 embed_dim = 3 seq_size = 2 batch_size = 3 sparse_input = tf.SparseTensor([[0, 2], [1, 0], [2, 1]], [1, 1, 1], [3, input_dim]) sparse_input_1d = tf.SparseTensor([[2], [0], [1]], [1, 1, 1], [input_dim]) tensor_input = tx.Constant(sparse_input, input_dim) tensor_input_1d = tx.Constant(sparse_input_1d, input_dim) lookup = tx.Lookup(tensor_input, seq_size, embedding_shape=[input_dim, embed_dim], batch_size=batch_size, batch_padding=False) lookup_padding = tx.Lookup(tensor_input, seq_size, embedding_shape=[input_dim, embed_dim], batch_size=batch_size, batch_padding=True) lookup_1d = tx.Lookup(tensor_input_1d, seq_size, embedding_shape=[input_dim, embed_dim], batch_size=batch_size, batch_padding=True) result = lookup() result_padding = lookup_padding() result_1d = lookup_1d() assert np.shape(result) == (2, seq_size, embed_dim) assert np.shape(result_padding) == (batch_size, seq_size, embed_dim) assert np.shape(result_1d) == (batch_size, seq_size, embed_dim)
def test_multihead_attention(): """ TODO check causality """ n_features = 3 embed_size = 128 seq_size = 3 batch_size = 2 n_heads = 8 inputs = tx.Constant(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) emb = tx.Lookup(inputs, seq_size=seq_size, embedding_shape=[n_features, embed_size]) attention = tx.MHAttention(query=emb, key=emb, value=emb, n_units=embed_size, n_heads=n_heads, causality=False, attention_dropout=0.1, regularized=False) assert len(attention.inputs) == 3 # 3 "kernels" + bias assert len(attention.variables) == 3 attention_reg = attention.reuse_with(emb, emb, emb, regularized=True) attention_2 = attention.reuse_with(emb, emb, emb, regularized=False) attention_causal = attention.reuse_with(emb, emb, emb, causality=True) attention_causal() result = attention() result_reg = attention_reg() result2 = attention_2() assert tx.same_shape(result, result_reg) assert tx.tensor_equal(result, result2) vars1 = map(lambda v: v.ref(), attention.variables) vars2 = map(lambda v: v.ref(), attention_2.variables) assert set(vars1) == set(vars2)
def test_attention(): n_features = 3 embed_size = 8 seq_size = 3 batch_size = 2 inputs = tx.Constant(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) emb = tx.Lookup(inputs, seq_size=seq_size, embedding_shape=[n_features, embed_size]) seq = emb() # keras attention doesn't have multiple heads attention = Attention(use_scale=False) res = attention([seq, seq, seq]) attention2 = tx.MHAttention(emb, emb, emb, n_units=embed_size, n_heads=1) assert len(attention2.variables) == 3 attention2.wq = tx.Linear(emb, n_units=None, weights=tf.linalg.eye(embed_size, embed_size), add_bias=False) attention2.wk = tx.Linear(emb, n_units=None, weights=tf.linalg.eye(embed_size, embed_size), add_bias=False) attention2.wv = tx.Linear(emb, n_units=None, weights=tf.linalg.eye(embed_size, embed_size), add_bias=False) assert tx.tensor_equal(attention2.wq(seq), seq) res2 = attention2() g = tx.Graph.build(inputs=emb, outputs=attention2) g = g.as_function(ord_inputs=emb, ord_outputs=attention2) res3 = g(seq) assert tx.tensor_equal(res, res2) assert tx.tensor_equal(res, res3)
def test_to_sparse_gradient(): target = tf.constant([[1., 0.], [1., 0.]]) x = tx.Constant(tf.ones([1, 4], dtype=tf.float32), n_units=4) h = tx.Linear(x, n_units=2) y = tx.ToSparse(h) y = tx.ToDense(y) @tf.function def loss_fn(labels, prediction): return tf.reduce_mean(tf.pow(labels - prediction, 2)) with tf.GradientTape() as tape: pred = y() loss = loss_fn(target, pred) gradients = tape.gradient(loss, h.trainable_variables) assert len(gradients) == 2 assert tx.same_shape(gradients[0], h.weights) assert tx.same_shape(gradients[1], h.bias)
def test_graph_as_function(): data = [[1., 2.]] in1 = tx.Input(n_units=1, name="in1", dtype=tf.float32, constant=False) in2 = tx.Input(n_units=1, name="in1", dtype=tf.float32, constant=False) in3 = tx.Constant(tf.ones(shape=[1], dtype=tf.float32)) in12 = tx.Add(in1, in2, in3) graph = tx.Graph.build(inputs=[in1, in2, in3], outputs=in12) fn = graph.as_function_v2(ord_inputs=[in1, in2, in3], stateful_inputs=True, compile=False, fn_name="add") # fn = graph.as_function_v2(stateful_inputs=True, compile=False) # TODO I should make sure the function converts the inputs to tensors # to make sure I don't pass lists around assert fn(np.array([[1.]], dtype=np.float), np.array([[1.]], dtype=np.float)) == [[3]] assert fn() == [[3]] assert fn([[1.]], [[2.]]) == [[4]] assert fn() == [[4]] assert fn([[2.]]) == [[5]]
def test_lookup_sparse_padding(): """ Sparse Lookup Padding Lookup adds padding if seq_size is greater than the max row indice in the input SparseTensor """ input_dim = 6 embed_dim = 4 seq_size = 3 sparse_input = tf.SparseTensor(indices=[[0, 1], [0, 3], [1, 0]], values=[1, 1, 1], dense_shape=[2, input_dim]) sparse_input = tx.Constant(sparse_input, input_dim) lookup = tx.Lookup(sparse_input, seq_size=seq_size, embedding_shape=[input_dim, embed_dim], batch_size=None, batch_padding=False) result = lookup() assert tf.sparse.to_dense(sparse_input()).shape == (2, input_dim) assert tx.tensor_equal(result[0][-1], tf.zeros([embed_dim]))
def test_constant_shape(): tensor = tf.ones([3, 3]) const_layer = tx.Constant(tensor) assert const_layer.shape == tensor.shape
def test_linear_function(): inputs = tx.Constant(tf.ones([2, 4]), dtype=tf.float64) linear = tx.Linear(inputs, n_units=8, dtype=tf.float64) fn = tf.function(linear.__call__) assert tx.tensor_equal(fn(), linear())