import tensorflow as tf from tensorflow.contrib import antares if tf.version.VERSION.startswith('2.'): tf = tf.compat.v1 tf.disable_eager_execution() input0 = tf.get_variable('input0', [1024, 512], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=False) input1 = tf.get_variable('input1', [512, 512], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=True) op = antares.make_op( ir= 'temp0[K, N] = input0[N, K] + 100; output0[N, M] +=! temp0[K, N] * input1[K, M] where K in 10', feed_dict={ 'input0': input0, 'input1': input1 }).tune(step=100, use_cache=True, timeout=600).emit() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) print('The result of tensor `%s` is:\n%s' % (op, sess.run(op)))
return tf.get_variable(name, shape, tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.001), trainable=True) input_tensor = tf.get_variable('input_tensor', [64, 3, 227, 227], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=False) output_logits = antares.make_op(ir=f''' conv_0[N, F, HO, WO] +=! input_tensor[N, C, HO * 4 + KH, WO * 4 + KW] * const_0_[KH, KW, C, F] where HO in 55, WO in 55; mpool_0[N, C, HO, WO ] >=! conv_0[N, C, HO * 2 + KH, WO * 2 + KW].call(`max`, [0.0]) where HO in 27, WO in 27, KH in 3, KW in 3; conv_1[N, F, HO, WO] +=! mpool_0[N, C, -2 + HO + KH, -2 + WO + KW].when([-2 + HO + KH >= 0, -2 + HO + KH < 27, -2 + WO + KW >= 0, -2 + WO + KW < 27], 0.0) * const_1_[KH, KW, C, F] where HO in 27, WO in 27; mpool_1[N, C, HO, WO ] >=! conv_1[N, C, HO * 2 + KH, WO * 2 + KW].call(`max`, [0.0]) where HO in 13, WO in 13, KH in 3, KW in 3; conv_2[N, F, HO, WO] +=! mpool_1[N, C, -1 + HO + KH, -1 + WO + KW].when([-1 + HO + KH >= 0, -1 + HO + KH < 13, -1 + WO + KW >= 0, -1 + WO + KW < 13], 0.0) * const_2_[KH, KW, C, F] where HO in 13, WO in 13; conv_3[N, F, HO, WO] +=! conv_2[N, C, -1 + HO + KH, -1 + WO + KW].call(`max`, [0.0]).when([-1 + HO + KH >= 0, -1 + HO + KH < 13, -1 + WO + KW >= 0, -1 + WO + KW < 13], 0.0) * const_3_[KH, KW, C, F] where HO in 13, WO in 13; mpool_2[N, C, HO, WO] >=! conv_3[N, C, HO * 2 + KH, WO * 2 + KW].call(`max`, [0.0]) where HO in 6, WO in 6, KH in 3, KW in 3; reshape_0[N0, N1] = mpool_2[N0, N1 // 36 % 256, N1 // 6 % 6, N1 % 6] where N1 in 9216; dense_0[N, M] +=! reshape_0[N, K] * const_5_[K, M]; dense_1[N, M] +=! dense_0[N, K].call(`max`, [0.0]) * const_6_[K, M]; dense_2[N, M] +=! dense_1[N, K].call(`max`, [0.0]) * const_7_[K, M]; ''', feed_dict={ 'input_tensor': input_tensor, 'const_0_': create_param('const_0_', [11, 11, 3, 64]), 'const_1_': create_param('const_1_', [5, 5, 64, 192]), 'const_2_': create_param('const_2_', [3, 3, 192, 384]), 'const_3_': create_param('const_3_', [3, 3, 384, 256]), 'const_4_': create_param('const_4_', [3, 3, 256, 256]), 'const_5_': create_param('const_5_', [9216, 4096]), 'const_6_': create_param('const_6_', [4096, 4096]), 'const_7_': create_param('const_7_', [4096, 1000]), }).emit() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess:
import tensorflow as tf from tensorflow.contrib import antares if tf.version.VERSION.startswith('2.'): tf = tf.compat.v1 tf.disable_eager_execution() input0 = tf.get_variable('input0', [1024 * 512], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=False) input1 = tf.get_variable('input1', [1024 * 512], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=False) op = antares.make_op( ir= 'output0[N] = input0[N] + input1[N]; output1[N] = input0[N].call(`exp`); output2[N] = input1[N] + output1[N];', extra_outputs=['output0', 'output1', 'output2'], feed_dict={ 'input0': input0, 'input1': input1 }).tune(step=100, use_cache=True, timeout=600).emit() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) print('The result of tensor `%s` is:\n%s' % (op, sess.run(op)))
tf_out = tf.nn.relu(tf_out) tf_out = tf.add(tf.matmul(tf_out, w1), b1) tf_out = tf.nn.relu(tf_out) tf_out = tf.add(tf.matmul(tf_out, w2), b2) out = x out = antares.make_op(ir=''' data_0[N, M] +=! data[N, K] * weight_0[K, M]; data_0_bias[N, K] = data_0[N, K] + bias_0[K]; data_1[N, K] = data_0_bias[N, K].call(`max`, [0.0]); data_2[N, M] +=! data_1[N, K] * weight_1[K, M]; data_2_bias[N, K] = data_2[N, K] + bias_1[K]; data_3[N, K] = data_2_bias[N, K].call(`max`, [0.0]); data_4[N, M] +=! data_3[N, K] * weight_2[K, M]; data_5[N, K] = (data_4[N, K] + bias_2[K]); ''', feed_dict={ 'data': x, 'weight_0': w0, 'weight_1': w1, 'weight_2': w2, 'bias_0': b0, 'bias_1': b1, 'bias_2': b2 }).tune(step=200, use_cache=True, timeout=600).emit() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) print('[Tensorflow Result]')
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares from _common import * x = create_variable([1024, 3072], dtype=tf.float32) compare_ops( tf.reduce_sum(x, axis=1), antares.make_op('output0[N] +=! input0[N, M]', [x]), )
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares from _common import * x = create_variable([1024], dtype=tf.int32) compare_ops( tf.one_hot(x, depth=128), antares.make_op( 'output0[N, F] = const(1.0).when([input0[N] == F], 0.0) where F in 128', [x]), )
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares import os x = tf.random.uniform([1024, 512]) y = tf.random.uniform([1024, 512]) op = antares.make_op('output0[N, M] = input0[N, M] * input1[N, M] + 1234', [x, y], server_addr=os.environ.get('ANTARES_ADDR', 'localhost:8880')) with tf.Session() as sess: print(sess.run(op)) sess.run([op] * 100)
# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares if tf.version.VERSION.startswith('2.'): tf = tf.compat.v1 tf.disable_eager_execution() x = tf.get_variable('x', [128, 1024], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=False) y = tf.get_variable('y', [1024, 1024], tf.float32, initializer=tf.initializers.ones(tf.float32), trainable=False) op = antares.make_op(ir='dot_0[N, M] +=! data[N, K] * weight[K, M]', feed_dict={ 'data': x, 'weight': y }).tune(step=100, use_cache=True, timeout=600).emit() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) print('The result of tensor `%s` is:\n%s' % (op, sess.run(op)))
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares from _common import * x = create_variable([1024, 64], dtype=tf.float32) compare_ops( tf.broadcast_to(tf.reshape(x, [1024, 64, 1]), shape=[1024, 64, 16]), antares.make_op('output0[N, M, K] = input0[N, M] where K in 16', [x]), )
layer_output_norm = antares.make_op(ir=f''' merged_layer_local[R, B, S1, N1, H1] +=! input_tensor[B, S1, N, H] * qkv_weight[R, N, H, N1, H1]; merged_layer_trans[R, B, N1, S1, H1] = merged_layer_local[R, B, S1, N1, H1] + qkv_bias[R, N1, H1]; attention_scores[B, N1, S1, S2] +=! merged_layer_trans[0, B, N1, S1, H1] * merged_layer_trans[1, B, N1, S2, H1] / const({H}).cast(`float32`); softmax_1_temp0[B, N1] >=! attention_scores[B, N1, S1, S2]; softmax_1_temp1[B, N1] +=! (attention_scores[B, N1, S1, S2] - softmax_1_temp0[B, N1]).call(`exp`); attention_probs[B, N1, S1, S2] = (attention_scores[B, N1, S1, S2] - softmax_1_temp0[B, N1]).call(`exp`) / softmax_1_temp1[B, N1]; context_layer_trans[B, S1, N1, H1] +=! attention_probs[B, N1, S1, S2] * merged_layer_trans[2, B, N1, S2, H1]; attention_local[B, S1, N2, H2] +=! context_layer_trans[B, S1, N1, H1] * attention_weight[N1, H1, N2, H2]; attention_output[B, S1, N2, H2] = attention_local[B, S1, N2, H2] + attention_bias[N2, H2]; layer_norm_1_src[B, S1, N2, H2] = attention_output[B, S1, N2, H2] + input_tensor[B, S1, N2, H2]; layer_norm_1_temp0[B, S1] += layer_norm_1_src[B, S1, N2, H2]; layer_norm_1_temp1[B, S1] += layer_norm_1_src[B, S1, N2, H2] * layer_norm_1_src[B, S1, N2, H2]; attention_output_norm[B, S1, N2, H2] = (layer_norm_1_src[B, S1, N2, H2] * {N * H} - layer_norm_1_temp0[B, S1]) * (layer_norm_1_temp0[B, S1] * {N * H} - layer_norm_1_temp1[B, S1] * layer_norm_1_temp1[B, S1]).call(`max`, [1e-8]).call(`rsqrt`); intermediate_local[B, S1, I] +=! attention_output_norm[B, S1, N2, H2] * intermediate_weight[N2, H2, I]; intermediate[B, S1, I] = intermediate_local[B, S1, I] + intermediate_bias[I]; intermediate_gelu[B, S1, I] = 0.5 * (1.0 + (0.79788456 * (intermediate[B, S1, I] + 0.044715 * intermediate[B, S1, I] * intermediate[B, S1, I] * intermediate[B, S1, I])).call(`tanh`)); layer_output_local[B, S1, N2, H2] +=! intermediate_gelu[B, S1, I] * output_weight[I, N2, H2]; layer_output[B, S1, N2, H2] = layer_output_local[B, S1, N2, H2] + output_bias[N2, H2]; layer_norm_2_src[B, S1, N2, H2] = layer_output[B, S1, N2, H2] + attention_output_norm[B, S1, N2, H2]; layer_norm_2_temp0[B, S1] += layer_norm_2_src[B, S1, N2, H2]; layer_norm_2_temp1[B, S1] += layer_norm_2_src[B, S1, N2, H2] * layer_norm_2_src[B, S1, N2, H2]; layer_output_norm[B, S1, N2, H2] = (layer_norm_2_src[B, S1, N2, H2] * {N * H} - layer_norm_2_temp0[B, S1]) * (layer_norm_2_temp0[B, S1] * {N * H} - layer_norm_2_temp1[B, S1] * layer_norm_2_temp1[B, S1]).call(`max`, [1e-8]).call(`rsqrt`); ''', feed_dict={ 'input_tensor': input_tensor, 'qkv_weight': create_param('qkv_weight', [3, N, H, N, H]), 'qkv_bias': create_param('qkv_bias', [3, N, H]), 'attention_weight': create_param('attention_weight', [N, H, N, H]), 'attention_bias': create_param('attention_bias', [N, H]), 'intermediate_weight': create_param('intermediate_weight', [N, H, I]), 'intermediate_bias': create_param('intermediate_bias', [I]), 'output_weight': create_param('output_weight', [I, N, H]), 'output_bias': create_param('output_bias', [N, H]), }).emit()
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares from _common import * x = create_variable([64, 224, 224, 3], dtype=tf.float32) compare_ops( tf.transpose(x, [0, 3, 1, 2]), antares.make_op('output0[N, C, H, W] = input0[N, H, W, C]', [x]), )
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares if tf.version.VERSION.startswith('2.'): tf = tf.compat.v1 tf.disable_eager_execution() from _common import * x = create_variable([1024, 64]) y = create_variable([1024, 64]) compare_ops( tf.add(x, y), antares.make_op('output0[N, M] = input0[N, M] + input1[N, M]', [x, y]), )
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares from _common import * x = create_variable([1024, 64]) y = create_variable([64, 4096]) compare_ops( tf.matmul(x, y), antares.make_op('output0[N, M] +=! input0[N, K] * input1[K, M]', [x, y]), )
#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import tensorflow as tf from tensorflow.contrib import antares import os x = tf.random.uniform([1024, 512]) op = antares.make_op('reduce_sum_0[N] +=! data[N, M]', {'data': x}, server_addr=os.environ.get('ANTARES_ADDR', 'localhost:8880')) with tf.Session() as sess: print('The result of tensor `%s` is:\n%s' % (op._output_names[0], sess.run(op))) sess.run([op] * 100)
reshape_0 = tf.reshape(mpool_2, [input_tensor.shape[0], -1]) dense_0 = tf.matmul(reshape_0, feed_dict['const_5_']) dense_1 = tf.matmul(tf.nn.relu(dense_0), feed_dict['const_6_']) dense_2 = tf.matmul(tf.nn.relu(dense_1), feed_dict['const_7_']) output_logits_tf = dense_2 output_logits = antares.make_op(ir=f''' conv_0[N, F, HO, WO] +=! input_tensor[N, C, HO * 4 + KH, WO * 4 + KW] * const_0_[KH, KW, C, F] where HO in 55, WO in 55; mpool_0[N, C, HO, WO] >=! conv_0[N, C, HO * 2 + KH, WO * 2 + KW].call(`max`, [0.0]) where HO in 27, WO in 27, KH in 3, KW in 3; conv_1[N, F, HO, WO] +=! mpool_0[N, C, -2 + HO + KH, -2 + WO + KW].when([-2 + HO + KH >= 0, -2 + HO + KH < 27, -2 + WO + KW >= 0, -2 + WO + KW < 27], 0.0) * const_1_[KH, KW, C, F] where HO in 27, WO in 27; mpool_1[N, C, HO, WO] >=! conv_1[N, C, HO * 2 + KH, WO * 2 + KW].call(`max`, [0.0]) where HO in 13, WO in 13, KH in 3, KW in 3; conv_2[N, F, HO, WO] +=! mpool_1[N, C, -1 + HO + KH, -1 + WO + KW].when([-1 + HO + KH >= 0, -1 + HO + KH < 13, -1 + WO + KW >= 0, -1 + WO + KW < 13], 0.0) * const_2_[KH, KW, C, F] where HO in 13, WO in 13; conv_2_relu[N, F, HO, WO] = conv_2[N, F, HO, WO].call(`max`, [0.0]); conv_3[N, F, HO, WO] +=! conv_2_relu[N, C, -1 + HO + KH, -1 + WO + KW].when([-1 + HO + KH >= 0, -1 + HO + KH < 13, -1 + WO + KW >= 0, -1 + WO + KW < 13], 0.0) * const_3_[KH, KW, C, F] where HO in 13, WO in 13; conv_3_relu[N, F, HO, WO] = conv_3[N, F, HO, WO].call(`max`, [0.0]); conv_4[N, F, HO, WO] +=! conv_3_relu[N, C, -1 + HO + KH, -1 + WO + KW].when([-1 + HO + KH >= 0, -1 + HO + KH < 13, -1 + WO + KW >= 0, -1 + WO + KW < 13], 0.0) * const_4_[KH, KW, C, F] where HO in 13, WO in 13; mpool_2[N, C, HO, WO] >=! conv_4[N, C, HO * 2 + KH, WO * 2 + KW].call(`max`, [0.0]) where HO in 6, WO in 6, KH in 3, KW in 3; reshape_0[N0, N1] = mpool_2[N0, N1 // 36 % 256, N1 // 6 % 6, N1 % 6] where N1 in 9216; dense_0[N, M] +=! reshape_0[N, K] * const_5_[K, M]; dense_0_relu[N, M] = dense_0[N, M].call(`max`, [0.0]); dense_1[N, M] +=! dense_0_relu[N, K] * const_6_[K, M]; dense_1_relu[N, M] = dense_1[N, M].call(`max`, [0.0]); dense_2[N, M] +=! dense_1_relu[N, K] * const_7_[K, M]; ''', feed_dict=feed_dict).emit() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) print('Result from Antares = %s' % sess.run([output_logits]))