def forward(self, inputs): if self._use_bias: return sym.dense(data=inputs, weight=self.weight, bias=self.bias, units=self._units) else: return sym.dense(data=inputs, weight=self.weight, units=self._units)
def overfeat(num_classes=1000): data = sym.Variable("data") body = conv2d_block(data, "conv1", 96, kernel_size=(11, 11), strides=(4, 4), padding=(5, 5)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool1") body = conv2d_block(body, "conv2", 256, kernel_size=(5, 5), strides=(1, 1), padding=(2, 2)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool2") body = conv2d_block(body, "conv3", 512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv4", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv5", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool3") flatten = sym.flatten(data=body, name="flatten") fc = sym.dense(data=flatten, units=3072, use_bias=False, name="fc1") fc = sym.dense(data=fc, units=4096, use_bias=False, name="fc2") fc = sym.dense(data=fc, units=num_classes, use_bias=False, name="fc3") return fc
def get_classifier(input_data, num_classes): """ Get VGG classifier layers as fc layers. """ flatten = sym.flatten(data=input_data, name="flatten") fc1 = sym.dense(data=flatten, units=32, name="fc1") relu1 = sym.relu(data=fc1, name="relu1") drop1 = sym.dropout(data=relu1, rate=0.5, name="drop1") fc2 = sym.dense(data=drop1, units=32, name="fc2") relu2 = sym.relu(data=fc2, name="relu2") drop2 = sym.dropout(data=relu2, rate=0.5, name="drop2") fc3 = sym.dense(data=drop2, units=num_classes, name="fc3") return fc3
def mlp(units): data = sym.Variable("data") deep = fc_layer(data, units[0], "fc_layer1") deep = fc_layer(deep, units[1], "fc_layer2") name = "output_layer" w = sym.Variable(name + "_fc_weight") b = sym.Variable(name + "_fc_bias") fc = sym.dense(data=deep, weight=w, bias=b, units=units[2], name=name + "_fc") gamma = sym.Variable(name + "_bn_gamma") beta = sym.Variable(name + "_bn_beta") moving_mean = sym.Variable(name + "_bn_moving_mean") moving_var = sym.Variable(name + "_bn_moving_var") bn = sym.batch_norm(data=fc, gamma=gamma, beta=beta, moving_mean=moving_mean, moving_var=moving_var, name=name + '_bn') mlp = sym.softmax(data=bn, name=name + 'softmax') return mlp
def nnvm_dot(c, a, b): """Implementation of dot.""" na = c.ref(a) nb = c.ref(b) return sym.dense(na, sym.transpose(nb, axes=(1, 0)), units=b.shape[1], use_bias=False)
def test_batch_norm(): x = sym.Variable('x') y = sym.dense(x, units=30, name="fc") z = sym.batch_norm(x, name='bn') assert z.list_input_names('aux_state') == [ 'bn_moving_mean', 'bn_moving_var' ] assert z.list_input_names('read_only') == ['x', 'bn_gamma', 'bn_beta']
def test_json_pass_with_attr(): x = sym.Variable('x') y = sym.dense(data=x, name='fc', units=30) g = graph.create(y) g._set_json_attr('version', '0.1.0') ret = g.apply('SaveJSON') json_str = ret.json_attr('json') ret._set_json_attr('json', json_str) g2 = ret.apply('LoadJSON') assert g2.json_attr('version') == '0.1.0'
def test_default_input(): x = sym.Variable('x') y = sym.dense(data=x, units=30, name='fc', use_bias=False) assert y.list_input_names() == ['x', 'fc_weight'] tname = [z.list_output_names()[0] for z in y.list_input_variables()] assert tname == y.list_input_names() try: z = sym.elemwise_add(x) assert False except NNVMError: pass
def get_symbol(data, num_classes=16, **kwargs): conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv") conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1") conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2") pool = Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0), name="pool") conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3") conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4") pool1 = Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0), name="pool1") in3a = Inception7A(pool1, 64, 64, 96, 96, 48, 64, "avg", 32, "mixed") in3b = Inception7A(in3a, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_1") in3c = Inception7A(in3b, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_2") in3d = Inception7B(in3c, 384, 64, 96, 96, "max", "mixed_3") in4a = Inception7C(in3d, 192, 128, 128, 192, 128, 128, 128, 128, 192, "avg", 192, "mixed_4") in4b = Inception7C(in4a, 192, 160, 160, 192, 160, 160, 160, 160, 192, "avg", 192, "mixed_5") in4c = Inception7C(in4b, 192, 160, 160, 192, 160, 160, 160, 160, 192, "avg", 192, "mixed_6") in4d = Inception7C(in4c, 192, 192, 192, 192, 192, 192, 192, 192, 192, "avg", 192, "mixed_7") in4e = Inception7D(in4d, 192, 320, 192, 192, 192, 192, "max", "mixed_8") in5a = Inception7E(in4e, 320, 384, 384, 384, 448, 384, 384, 384, "avg", 192, "mixed_9") in5b = Inception7E(in5a, 320, 384, 384, 384, 448, 384, 384, 384, "max", 192, "mixed_10") pool = Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", pad=(0, 0), name="global_pool") flatten = sym.flatten(data=pool, name="flatten") fc1 = sym.dense(data=flatten, units=num_classes, name="fc1") softmax = sym.softmax(data=fc1, name="softmax") return softmax
def test_json_pass(): x = sym.Variable('x') y = sym.dense(data=x, name='conv', units=30) g = graph.create(y) ret = g.apply('SaveJSON') ret._set_json_attr('json', ret.json_attr('json')) g2 = ret.apply('LoadJSON') assert g2.apply('SaveJSON').json_attr('json') == ret.json_attr('json') json = g.json() g2 = graph.load_json(json) assert json == g2.json()
def yolo(num_classes=1470): data = sym.Variable("data") body = conv2d_block(data, "conv1", 64, kernel_size=(7, 7), strides=(2, 2), padding=(3, 3)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool1") body = conv2d_block(body, "conv2", 192, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool2") body = conv2d_block(body, "conv3", 128, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv4", 256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv5", 256, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv6", 512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool3") body = conv2d_block(body, "conv7", 256, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv8", 512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv9", 256, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv10", 512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv11", 256, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv12", 512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv13", 256, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv14", 512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv15", 512, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv16", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = sym.max_pool2d(data=body, pool_size=(2, 2), strides=(2, 2), name="pool4") body = conv2d_block(body, "conv17", 512, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv18", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv19", 512, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0)) body = conv2d_block(body, "conv20", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv21", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv22", 1024, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1)) body = conv2d_block(body, "conv23", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) body = conv2d_block(body, "conv24", 1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) flatten = sym.flatten(data=body, name="flatten") fc = sym.dense(data=flatten, units=4096, use_bias=False, name="fc1") act = sym.relu(data=fc, name="relu1") fc = sym.dense(data=act, units=num_classes, use_bias=False, name="fc2") return fc
def test_dense(): x = sym.Variable("data", shape=(10, 20)) y = sym.dense(x, units=30, name="fc") g, ldict = correct_layout(y, "HW") assert(ldict["data"][0] == "HW") assert(ldict["fc"][0] == "HW") assert(ldict["fc_bias"][0] == "__undef__") # second pass will insert layout transform _, ldict = correct_layout(g, "HW16w") assert(ldict["data"][0] == "HW16w") assert(ldict["data_HW"][0] == "HW") assert(ldict["fc"][0] == "HW") assert(ldict["fc_bias"][0] == "__undef__")
def test_dense(): x = sym.Variable("data", shape=(10, 20)) y = sym.dense(x, units=30, name="fc") g, ldict = correct_layout(y, "HW") assert (ldict["data"][0] == "HW") assert (ldict["fc"][0] == "HW") assert (ldict["fc_bias"][0] == "__undef__") # second pass will insert layout transform _, ldict = correct_layout(g, "HW16w") assert (ldict["data"][0] == "HW16w") assert (ldict["data_HW"][0] == "HW") assert (ldict["fc"][0] == "HW") assert (ldict["fc_bias"][0] == "__undef__")
def test_dense(): x = sym.Variable("x", shape=(10, 100)) w = sym.Variable("dense_weight", shape=(3, 100)) b = sym.Variable("dense_bias", shape=(3, )) y = sym.dense(x, w, b, use_bias=True, units=3, name="dense") y = sym.flatten(y) def forward(x, dense_weight, dense_bias): return np.dot(x, dense_weight.T) + dense_bias dtype = "float32" inputs = [('x', (10, 100), x), ('dense_weight', (3, 100), w), ('dense_bias', (3, ), b)] helper(y, inputs, dtype, forward)
def compile(self, **kwargs): if kwargs['op'] == 'dense': return sym.dense(data=kwargs['data'], weight=kwargs['weight'], bias=kwargs['bias'], units=kwargs['units']) elif kwargs['op'] == 'relu': return sym.relu(data=kwargs['data']) elif kwargs['op'] == 'leaky_relu': return sym.leaky_relu(data=kwargs['data'], alpha=kwargs['alpha']) elif kwargs['op'] == 'sigmoid': return sym.sigmoid(data=kwargs['data']) else: raise RuntimeError('invalid operator')
def mobile_net(num_classes=1000, alpha=1.0, is_shallow=False): """Function to construct a MobileNet""" data = sym.Variable("data") body = conv_block(data, "conv_block_1", int(32 * alpha), strides=(2, 2)) body = separable_conv_block(body, "separable_conv_block_1", int(32 * alpha), int(64 * alpha)) body = separable_conv_block(body, "separable_conv_block_2", int(64 * alpha), int(128 * alpha), downsample=True) body = separable_conv_block(body, "separable_conv_block_3", int(128 * alpha), int(128 * alpha)) body = separable_conv_block(body, "separable_conv_block_4", int(128 * alpha), int(256 * alpha), downsample=True) body = separable_conv_block(body, "separable_conv_block_5", int(256 * alpha), int(256 * alpha)) body = separable_conv_block(body, "separable_conv_block_6", int(256 * alpha), int(512 * alpha), downsample=True) if is_shallow: body = separable_conv_block(body, "separable_conv_block_7", int(512 * alpha), int(1024 * alpha), downsample=True) body = separable_conv_block(body, "separable_conv_block_8", int(1024 * alpha), int(1024 * alpha)) else: for i in range(7, 12): body = separable_conv_block(body, "separable_conv_block_%d" % i, int(512 * alpha), int(512 * alpha)) body = separable_conv_block(body, "separable_conv_block_12", int(512 * alpha), int(1024 * alpha), downsample=True) body = separable_conv_block(body, "separable_conv_block_13", int(1024 * alpha), int(1024 * alpha)) pool = sym.global_avg_pool2d(data=body, name="pool") flatten = sym.flatten(data=pool, name="flatten") fc = sym.dense(data=flatten, units=num_classes, use_bias=False, name="fc") softmax = sym.softmax(data=fc, name="softmax") return softmax
def test_dense(): x = sym.Variable("x") y = sym.dense(x, units=3, name="dense") y = sym.flatten(y) def forward(x, dense_weight, dense_bias): return np.dot(x, dense_weight.T) + dense_bias dtype = "float32" inputs = { 'x': ((10, 100), x), 'dense_weight': ((3, 100),), 'dense_bias': ((3,),) } helper(y, inputs, dtype, forward)
def test_dense(): x = sym.Variable("x") y = sym.dense(x, units=3, name="dense") y = sym.flatten(y) def forward(x, dense_weight, dense_bias): return np.dot(x, dense_weight.T) + dense_bias dtype = "float32" inputs = { 'x': ((10, 100), x), 'dense_weight': ((3, 100), ), 'dense_bias': ((3, ), ) } helper(y, inputs, dtype, forward)
def fc_layer(data, units, name): w = sym.Variable(name + "_w") b = sym.Variable(name + "_b") fc = sym.dense(data=data, weight=w, bias=b, units=units, name=name + '_fc') relu = sym.relu(data=fc, name=name + '_relu') gamma = sym.Variable(name + "_gamma") beta = sym.Variable(name + "_beta") moving_mean = sym.Variable(name + "_moving_mean") moving_var = sym.Variable(name + "_moving_var") bn = sym.batch_norm(data=relu, gamma=gamma, beta=beta, moving_mean=moving_mean, moving_var=moving_var, name=name + '_bn') return bn
def test_dense(): x = sym.Variable("x", shape=(10, 100)) w = sym.Variable("dense_weight", shape=(3, 100)) b = sym.Variable("dense_bias", shape=(3,)) y = sym.dense(x, w, b, use_bias=True, units=3, name="dense") y = sym.flatten(y) def forward(x, dense_weight, dense_bias): return np.dot(x, dense_weight.T) + dense_bias shape = { 'x': (10, 100), 'w': (3, 100), 'b': (3,) } # Don't check gradients on cuda because is doesn't yet support ewise after reduce check_function(y, forward, shape=shape, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, forward, shape=shape, only_targets={'cuda'}, numerical_grads=False)
def test_dense(): x = sym.Variable("x") y = sym.dense(x, units=3, name="dense") y = sym.flatten(y) dtype = "float32" shape = { "x": (10, 100), "dense_weight": (3, 100), "dense_bias": (3, ), } for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, shape) m = graph_runtime.create(graph, lib, ctx) x_np = np.random.uniform(size=shape["x"]).astype(dtype) w_np = np.random.uniform(size=shape["dense_weight"]).astype(dtype) b_np = np.random.uniform(size=shape["dense_bias"]).astype(dtype) res = tvm.nd.empty((10, 3)) m.run(x=x_np, dense_weight=w_np, dense_bias=b_np) m.get_output(0, res) res_np = np.dot(x_np, w_np.T) + b_np np.testing.assert_allclose(res.asnumpy(), res_np, atol=1e-5, rtol=1e-5)
def nn(m: Model): v_images = sym.Variable("images", shape=(BATCH_SIZE, 1, 28, 28), dtype=0) v_true_labels = sym.Variable("true_labels", shape=(BATCH_SIZE, 10), dtype=0) x = v_images x = sym.reshape(data=x, shape=(BATCH_SIZE, 28 * 28)) x = sym.dense(data=x, units=10) logits = x x = -sym.elemwise_mul(v_true_labels, sym.log_softmax(x)) loss = sym.sum(x) / BATCH_SIZE # This is not really accuracy, because we use softmax instead of hardmax accuracy = sym.sum(v_true_labels * sym.softmax(logits)) / BATCH_SIZE # We have to somehow list all weights (the corresponding variables are generated automatically) weight_vars = [ v for v in loss.list_input_variables() if v.attr('name') not in ['images', 'true_labels'] ] optimizer = SGD(learning_rate=1e-4) update_step = optimizer.minimize(loss, var=weight_vars) tgraph = nnvm.graph.create(sym.Group( [loss, update_step])).apply("InferShape").apply("InferType") fgraph = nnvm.graph.create(sym.Group( [loss, accuracy])).apply("InferShape").apply("InferType") m.tgraph = tgraph m.fgraph = fgraph m.optimizer = optimizer m.loss = loss return m
def test_graph_json_attr(): x = sym.Variable('x') y = sym.dense(data=x, name='fc', units=30) g = graph.create(y) g._set_json_attr('ilist', [1,2,3], 'list_int') assert g.json_attr('ilist') == [1,2,3]
def test_list_args(): x = sym.Variable('x') z = sym.Variable('z') y = sym.dense(data=x, name='fc', units=30) y = sym.elemwise_add(y, z, name='add1')
def test_dense(): x = sym.Variable('x') y = sym.dense(x, units=30, name="fc") assert y.list_input_names() == ["x", "fc_weight", "fc_bias"]
def test_batch_norm(): x = sym.Variable('x') y = sym.dense(x, units=30, name="fc") z = sym.batch_norm(x, name='bn') assert z.list_input_names('aux_state') == ['bn_moving_mean', 'bn_moving_var'] assert z.list_input_names('read_only') == ['x', 'bn_gamma', 'bn_beta']
def test_cnn_gradients(): # input data h = 128 w = 128 data_shape = (1000, 3, h, w) data = sym.Variable('data', shape=data_shape, dtype=0) # conv2d num_channels = 64 kernel_size = 32 conv_w_shape = (num_channels, 3, kernel_size, kernel_size) conv_b_shape = (num_channels,) conv_w = sym.Variable('conv_w', shape=conv_w_shape) conv_b = sym.Variable('conv_b', shape=conv_b_shape) conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b, channels=num_channels, kernel_size=(kernel_size, kernel_size), name='conv1') # relu1 relu1 = sym.relu(data=conv1, name='relu1') # max pooling max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1') # flatten flatten1 = sym.flatten(data=max_pooling1) # shape after flatten flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels # dense1 dense1_hidden_units = 100 dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units) # relu2 relu2 = sym.relu(data=dense1, name='relu2') # dense2 dense2_hidden_units = 10 dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units) # softmax mlp = sym.softmax(data=dense2, name='softmax') # fake non-sparse label label = sym.full_like(mlp, fill_value=1) # cross entropy loss ce_loss = sym.sum( sym.elemwise_mul(sym.log_softmax(dense2), label), axis=1, keepdims=True, name="ce_loss") # input variables: # print grad_g.symbol.list_input_names() # >> ['data', 'conv_w', 'conv_b', # 'dense1_weight', 'dense1_bias', # 'dense2_weight', 'dense2_bias'] # output gradient variables: # print grad_g.symbol.list_output_names() # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias', # 'dense1_grad_weight', 'dense1_grad_bias', # 'dense2_grad_weight', 'dense2_grad_bias'] grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables()) # infer shape in_shapes, out_shapes = graph_util.infer_shape(grad_g) # forward graph shape assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape), [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units], [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]] # input grads shape should be equal with input shape assert in_shapes == out_shapes # output grads w.r.t input variables grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables()) # gradients number should be equal with grad_input number assert len(grads) == len(ce_loss.list_input_variables()) # infer type in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32']
def LR(units): data = sym.Variable("data") w = sym.Variable("w") b = sym.Variable("b") fc = sym.dense(data=data, weight=w, bias=b, units=units, name='fc') return fc
a = tvm.placeholder((1, 3, 32, 32), name="a") b = tvm.placeholder((1, 10), name="b") dense_weight = sym.Variable("dense_weight", init=np.empty((900, 10), dtype=dtype)) # define network data = sym.Variable("data") y1 = sym.conv2d(data=data, channels=1, kernel_size=(3, 3), padding=(0, 0), use_bias=False, out_layout='NCHW') y2 = sym.flatten(y1) #y3 = sym.dense(y2, units=10, use_bias=False) y3 = sym.dense(y2, weight=dense_weight, use_bias=False) y4 = sym.softmax(y3) out = y4 # This is some of the loss function # create workload net, params = create_workload(out, batch_size, image_shape, dtype) #print(net.debug_str()) target = tvm.target.create('llvm') #target = tvm.target.create('opencl') with nnvm.compiler.build_config(opt_level=0): graph, lib, params = nnvm.compiler.build(net, target, shape={"data": data_shape}, params=params)
def test_dense(): x = sym.Variable('x') x1 = sym.dense(x, units=3, name="dense") x2 = sym.flatten(x1) x3 = sym.softmax(x2) assert x3.list_input_names() == ['x', 'dense_weight', 'dense_bias']
def test_cnn_gradients(): # input data h = 128 w = 128 data_shape = (1000, 3, h, w) data = sym.Variable('data', shape=data_shape, dtype=0) # conv2d num_channels = 64 kernel_size = 32 conv_w_shape = (num_channels, 3, kernel_size, kernel_size) conv_b_shape = (num_channels, ) conv_w = sym.Variable('conv_w', shape=conv_w_shape) conv_b = sym.Variable('conv_b', shape=conv_b_shape) conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b, channels=num_channels, kernel_size=(kernel_size, kernel_size), name='conv1') # relu1 relu1 = sym.relu(data=conv1, name='relu1') # max pooling max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1') # flatten flatten1 = sym.flatten(data=max_pooling1) # shape after flatten flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels # dense1 dense1_hidden_units = 100 dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units) # relu2 relu2 = sym.relu(data=dense1, name='relu2') # dense2 dense2_hidden_units = 10 dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units) # softmax mlp = sym.softmax(data=dense2, name='softmax') # fake non-sparse label label = sym.full_like(mlp, fill_value=1) # cross entropy loss ce_loss = sym.sum(sym.elemwise_mul(sym.log_softmax(dense2), label), axis=1, keepdims=True, name="ce_loss") # input variables: # print grad_g.symbol.list_input_names() # >> ['data', 'conv_w', 'conv_b', # 'dense1_weight', 'dense1_bias', # 'dense2_weight', 'dense2_bias'] # output gradient variables: # print grad_g.symbol.list_output_names() # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias', # 'dense1_grad_weight', 'dense1_grad_bias', # 'dense2_grad_weight', 'dense2_grad_bias'] grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables()) # infer shape in_shapes, out_shapes = graph_util.infer_shape(grad_g) # forward graph shape assert in_shapes == [ list(data_shape), list(conv_w_shape), list(conv_b_shape), [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units], [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units] ] # input grads shape should be equal with input shape assert in_shapes == out_shapes # output grads w.r.t input variables grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables()) # gradients number should be equal with grad_input number assert len(grads) == len(ce_loss.list_input_variables()) # infer type in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g) assert out_dtypes == [ 'float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32' ]
def test_check_function(): # test the testing function x = sym.Variable("x") y = sym.Variable("y") # different styles of returning gradients from the backward function check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: [head_grads, 2 * head_grads], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: (head_grads, 2 * head_grads), shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: { 'x': head_grads, 'y': 2 * head_grads }, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: {'y': 2 * head_grads}, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: [2 * head_grads], grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: 2 * head_grads, grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: 2 * head_grads, grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float64') # test just numerical gradients # different styles of shape and dtype passing check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, numerical_grads=True) check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32', numerical_grads=True) check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, dtype={ x: 'float32', 'y': 'float32' }, numerical_grads=True) check_function(x + 2 * y, shape=(1, 2), dtype='float32', numerical_grads=True) # specifying variable attributes on variable creation # (in this case type codes must be used) x = sym.Variable("x", dtype=0, shape=(1, 2)) check_function(x + 2 * y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True) y = sym.Variable("y", dtype=0, shape=(1, 2)) # shape overriding def _fwd1(x, y): assert x.shape == (1, 1) assert y.shape == (1, 2) return x + 2 * y check_function(x + 2 * y, _fwd1, shape={x: (1, 1)}) # in_range def _fwd2(x, y): assert x.shape == (100, ) assert (x <= 0.9).all() assert (x >= 0.8).all() return x + 2 * y check_function(x + 2 * y, _fwd2, shape=(100, ), in_range=(0.8, 0.9), numerical_grads=False) check_function(x + 2 * y, _fwd2, shape=(100, ), in_range={'x': (0.8, 0.9)}, numerical_grads=False) check_function(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0], in_range={'head_grads_0': (1.0, 1.0)}) # explicit passing of values check_function(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0], values={'head_grads_0': np.full((1, 2), 1.0)}) # check that the function reports errors def _check_function_must_fail(*args, **kwargs): error = AssertionError if 'error' in kwargs: error = kwargs['error'] del kwargs['error'] try: check_function(*args, quiet=True, **kwargs) except error: pass else: raise AssertionError("check_function didn't raise an exception") _check_function_must_fail(x + 2 * y, error=ValueError) _check_function_must_fail(x + 2 * y, lambda x, y: x + y) _check_function_must_fail(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0]) _check_function_must_fail(sym.block_grad(x + 2 * y), numerical_grads=True) _check_function_must_fail(x * x, numerical_grads=True, numerical_grads_params={ 'atol': 0.0, 'rtol': 0.0 }) _check_function_must_fail(sym.log(-x * x), numerical_grads=True, error=ValueError) # different styles of returning results from the forward function check_function(x + 2 * y, lambda x, y: [x + 2 * y], numerical_grads=False) _check_function_must_fail(x + 2 * y, lambda x, y: [x + 2 * y, x], numerical_grads=False, error=ValueError) _check_function_must_fail(x + 2 * y, lambda x, y: [], numerical_grads=False, error=ValueError) # multiple outputs z = sym.Group([2 * x + y, x + 2 * y]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y]) check_function(z, lambda x, y: (2 * x + y, x + 2 * y)) check_function( z, backward=lambda x, y, head_grads: [2 * head_grads[0] + head_grads[1], head_grads[0] + 2 * head_grads[1]]) _check_function_must_fail(z, backward=lambda x, y, head_grads: [2 * head_grads[0], 2 * head_grads[1]]) check_function( z, backward=lambda x, y, head_grads: [head_grads[1], 2 * head_grads[1]], in_range={'head_grads_0': (0, 0)}) check_function(z, numerical_grads=True) z = sym.Group([sym.block_grad(2 * x + y), x + 2 * y]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y], numerical_grads=False) _check_function_must_fail(z, lambda x, y: [2 * x + y, x + 2 * y]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2 * x + y, sym.block_grad(x + 2 * y)]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2 * x + y, x + 2 * y, x, y, sym.sum(x)]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y, x, y, np.sum(x)]) # passing additional parameters to forward and backward def _fwd3(x, p): assert p == 'v' return x + 1 def _bwd3(x, p, head_grads): assert p == 'v' return head_grads check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'}) # implicitly created variables and shape/dtype inference for inputs x = sym.Variable("x", shape=(2, 3), dtype=0) b = sym.Variable("b") y = sym.dense(data=x, bias=b, units=4) # Don't check gradients on cuda because is doesn't yet support ewise after reduce check_function(y, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True) x = sym.Variable("x") b = sym.Variable("b") w = sym.Variable("w") y = sym.dense(data=x, bias=b, weight=w, units=4) def _fwd_dense(x, w, b): return np.dot(x, w.T) + b check_function(y, _fwd_dense, shape={'x': (1, 2)}, dtype={'x': 'float32'}, numerical_grads=False) check_function(y, _fwd_dense, shape={'x': (1, 2)}, dtype={'w': 'float64'}, numerical_grads=False) _check_function_must_fail(y, _fwd_dense, shape={'x': (1, 2)}, dtype={ 'w': 'float64', 'b': 'float32' }, numerical_grads=False, error=nnvm._base.NNVMError) # fails because no shape _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError) # ok because type is float32 by default check_function(y, _fwd_dense, shape={'x': (1, 2)}, numerical_grads=False)
def test_graph_json_attr(): x = sym.Variable('x') y = sym.dense(data=x, name='fc', units=30) g = graph.create(y) g._set_json_attr('ilist', [1, 2, 3], 'list_int') assert g.json_attr('ilist') == [1, 2, 3]
def test_dense(): x = sym.Variable("x", shape=(10, 20)) y = sym.dense(x, units=30, name="fc") sdict = infer_shape(y) assert(sdict["fc"][0] == [10, 30]) assert(sdict["fc_bias"][0] == [30])
def test_check_function(): # test the testing function x = sym.Variable("x") y = sym.Variable("y") # different styles of returning gradients from the backward function check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: [head_grads, 2*head_grads], shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: (head_grads, 2*head_grads), shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads}, shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: {'y': 2*head_grads}, shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: [2*head_grads], grad_input_vars=[y], shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: 2*head_grads, grad_input_vars=[y], shape={'x': (1, 2), y: (1, 2)}, dtype='float32') check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: 2*head_grads, grad_input_vars=[y], shape={'x': (1, 2), y: (1, 2)}, dtype='float64') # test just numerical gradients # different styles of shape and dtype passing check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, numerical_grads=True) check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype='float32', numerical_grads=True) check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype={x: 'float32', 'y': 'float32'}, numerical_grads=True) check_function(x + 2*y, shape=(1, 2), dtype='float32', numerical_grads=True) # specifying variable attributes on variable creation # (in this case type codes must be used) x = sym.Variable("x", dtype=0, shape=(1, 2)) check_function(x + 2*y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True) y = sym.Variable("y", dtype=0, shape=(1, 2)) # shape overriding def _fwd1(x, y): assert x.shape == (1, 1) assert y.shape == (1, 2) return x + 2*y check_function(x + 2*y, _fwd1, shape={x: (1, 1)}) # in_range def _fwd2(x, y): assert x.shape == (100,) assert (x <= 0.9).all() assert (x >= 0.8).all() return x + 2*y check_function(x + 2*y, _fwd2, shape=(100,), in_range=(0.8, 0.9), numerical_grads=False) check_function(x + 2*y, _fwd2, shape=(100,), in_range={'x': (0.8, 0.9)}, numerical_grads=False) check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0], in_range={'head_grads_0': (1.0, 1.0)}) # explicit passing of values check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0], values={'head_grads_0': np.full((1, 2), 1.0)}) # check that the function reports errors def _check_function_must_fail(*args, **kwargs): error = AssertionError if 'error' in kwargs: error = kwargs['error'] del kwargs['error'] try: check_function(*args, quiet=True, **kwargs) except error: pass else: raise AssertionError("check_function didn't raise an exception") _check_function_must_fail(x + 2*y, error=ValueError) _check_function_must_fail(x + 2*y, lambda x, y: x + y) _check_function_must_fail(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0]) _check_function_must_fail(sym.block_grad(x + 2*y), numerical_grads=True) _check_function_must_fail(x*x, numerical_grads=True, numerical_grads_params={'atol': 0.0, 'rtol': 0.0}) _check_function_must_fail(sym.log(-x*x), numerical_grads=True, error=ValueError) # different styles of returning results from the forward function check_function(x + 2*y, lambda x, y: [x + 2*y], numerical_grads=False) _check_function_must_fail(x + 2*y, lambda x, y: [x + 2*y, x], numerical_grads=False, error=ValueError) _check_function_must_fail(x + 2*y, lambda x, y: [], numerical_grads=False, error=ValueError) # multiple outputs z = sym.Group([2*x + y, x + 2*y]) check_function(z, lambda x, y: [2*x + y, x + 2*y]) check_function(z, lambda x, y: (2*x + y, x + 2*y)) check_function(z, backward=lambda x, y, head_grads: [2*head_grads[0] + head_grads[1], head_grads[0] + 2*head_grads[1]]) _check_function_must_fail(z, backward=lambda x, y, head_grads: [2*head_grads[0], 2*head_grads[1]]) check_function(z, backward=lambda x, y, head_grads: [head_grads[1], 2*head_grads[1]], in_range={'head_grads_0': (0, 0)}) check_function(z, numerical_grads=True) z = sym.Group([sym.block_grad(2*x + y), x + 2*y]) check_function(z, lambda x, y: [2*x + y, x + 2*y], numerical_grads=False) _check_function_must_fail(z, lambda x, y: [2*x + y, x + 2*y]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2*x + y, sym.block_grad(x + 2*y)]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2*x + y, x + 2*y, x, y, sym.sum(x)]) check_function(z, lambda x, y: [2*x + y, x + 2*y, x, y, np.sum(x)]) # passing additional parameters to forward and backward def _fwd3(x, p): assert p == 'v' return x + 1 def _bwd3(x, p, head_grads): assert p == 'v' return head_grads check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'}) # implicitly created variables and shape/dtype inference for inputs x = sym.Variable("x", shape=(2, 3), dtype=0) b = sym.Variable("b") y = sym.dense(data=x, bias=b, units=4) # Don't check gradients on cuda because is doesn't yet support ewise after reduce check_function(y, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True) x = sym.Variable("x") b = sym.Variable("b") w = sym.Variable("w") y = sym.dense(data=x, bias=b, weight=w, units=4) def _fwd_dense(x, w, b): return np.dot(x, w.T) + b check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'x': 'float32'}, numerical_grads=False) check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64'}, numerical_grads=False) _check_function_must_fail(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64', 'b': 'float32'}, numerical_grads=False, error=nnvm._base.NNVMError) # fails because no shape _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError) # ok because type is float32 by default check_function(y, _fwd_dense, shape={'x': (1,2)}, numerical_grads=False)
dtype = "float32" # name "data" is preferred! data = sym.Variable("data") # if you want to create_workload, you may have to let the system automatically generate layer kernels # if you pass a self-defined kernel in, there will be error #conv_kernel = sym.Variable("conv_kernel") x = sym.conv2d(data=data, channels=1, kernel_size=(3, 3), padding=(0, 0), use_bias=False, out_layout='NCHW') x = sym.flatten(data=x) x = sym.dense(data=x, units=num_class, use_bias=False) ''' params = {} g = graph.create(x) input_shapes, _ = graph_util.infer_shape(g, data=data_shape) shape_dict = dict(zip(g.index.input_names, input_shapes)) np.random.seed(0) initializer = Xavier() for k, v in shape_dict.items(): if k == 'data': print(k) continue print(k, end='\t') print(v) init_value = np.zeros(v).astype(dtype) initializer(k, init_value)