def test_gradient(): x = sym.Variable("x") y = sym.Variable("y") z1 = sym.elemwise_add(x, sym.sqrt(y)) z2 = sym.log(x) gradient = graph_util.gradients([z1, z2], [x, y]) assert len(gradient) == 2 g1 = sym.Variable("g1") g2 = sym.Variable("g2") grad_ys = [g1, g2] gradient = graph_util.gradients(sym.Group([z1, z2]), sym.Group([x, y]), grad_ys=grad_ys) g_graph = graph.create(sym.Group(gradient)).ir() assert len(gradient) == 2 assert "g1" in g_graph assert "g2" in g_graph
def from_caffe2(self, init_net, predict_net): """Construct nnvm nodes from caffe2 graph. Parameters ---------- workspace : Caffe2 workspace predict_net : protobuf object Returns ------- sym : nnvm.sym.Symbol The returned nnvm symbol params : dict A dict of name: tvm.nd.array pairs, used as pretrained weights """ from caffe2.python import workspace workspace.RunNetOnce(init_net) # Input input_name = predict_net.op[0].input[0] # Params self._params = {} used_blobs = set() for c2_op in predict_net.op: for i in c2_op.input: used_blobs.add(i) for blob in workspace.Blobs(): if blob in used_blobs and blob != input_name: self._params[blob] = tvm.nd.array(workspace.FetchBlob(blob)) # Variables self._nodes = {} for blob in predict_net.external_input: self._nodes[blob] = _sym.Variable(name=blob) # Ops for c2_op in predict_net.op: for blob in c2_op.output: self._ops[blob] = c2_op for c2_op in predict_net.op: self._process_op(c2_op) # Outputs out = [] for blob in predict_net.external_output: out.append(self._nodes[blob]) if len(out) > 1: sym = _sym.Group(out) else: sym = out[0] return sym, self._params
def gradients(ys, xs, grad_ys=None): if isinstance(ys, list): ys = symbol.Group(ys) g = graph.create(ys) g._set_symbol_list_attr('grad_ys', ys) g._set_symbol_list_attr('grad_xs', xs) ny = len(ys.list_output_names()) if grad_ys is None: grad_ys = [symbol.ones_like(ys[i]) for i in range(ny)] g._set_symbol_list_attr('grad_ys_out_grad', grad_ys) sym = g.apply('Gradient').symbol nx = len(xs) if isinstance(xs, list) else len(xs.list_output_names()) ret = [sym[i] for i in range(nx)] return ret
def nn(m: Model): v_images = sym.Variable("images", shape=(BATCH_SIZE, 1, 28, 28), dtype=0) v_true_labels = sym.Variable("true_labels", shape=(BATCH_SIZE, 10), dtype=0) x = v_images x = sym.reshape(data=x, shape=(BATCH_SIZE, 28 * 28)) x = sym.dense(data=x, units=10) logits = x x = -sym.elemwise_mul(v_true_labels, sym.log_softmax(x)) loss = sym.sum(x) / BATCH_SIZE # This is not really accuracy, because we use softmax instead of hardmax accuracy = sym.sum(v_true_labels * sym.softmax(logits)) / BATCH_SIZE # We have to somehow list all weights (the corresponding variables are generated automatically) weight_vars = [ v for v in loss.list_input_variables() if v.attr('name') not in ['images', 'true_labels'] ] optimizer = SGD(learning_rate=1e-4) update_step = optimizer.minimize(loss, var=weight_vars) tgraph = nnvm.graph.create(sym.Group( [loss, update_step])).apply("InferShape").apply("InferType") fgraph = nnvm.graph.create(sym.Group( [loss, accuracy])).apply("InferShape").apply("InferType") m.tgraph = tgraph m.fgraph = fgraph m.optimizer = optimizer m.loss = loss return m
def test_order_mutation_pass(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', dev='gpu') y = sym.add(y, x, name='add1') # write after read z = sym.assign(x, y, name='assign') # read after write t = sym.add(y, x, name='add2') g = graph.create(sym.Group([t, z])) jgraph = json.loads(g.apply(['OrderMutation', 'SaveJSON']).json_attr('json')) jnodes = jgraph['nodes'] nindex = {n['name']: i for i, n in enumerate(jnodes)} assert nindex['assign'] in jnodes[nindex['add2']]['control_deps'] assert nindex['conv'] in jnodes[nindex['assign']]['control_deps'] assert nindex['add1'] in jnodes[nindex['assign']]['control_deps'] assert jnodes[nindex['assign']]['inputs'][0][2] == 1
def run(self, fetch, feed_dict=None): if isinstance(fetch, list): fetch = symbol.Group(fetch) feed_dict = feed_dict if feed_dict else {} feed_placeholders = [] feed_dptr = [] feed_dtype = [] feed_shape_csr_ptr = [0] feed_shape_data = [] src_list = [] for k, v in feed_dict.items(): assert isinstance(k, symbol.Symbol) assert isinstance(v, np.ndarray) feed_placeholders.append(k.handle) # only convert to float32 for now source_array = np.ascontiguousarray(v, dtype=np.float32) # leep src_list alive for the period src_list.append(source_array) feed_dptr.append(source_array.ctypes.data_as(_ctypes.c_void_p)) feed_dtype.append(0) feed_shape_data.extend(source_array.shape) feed_shape_csr_ptr.append(len(feed_shape_data)) out_size = nn_uint() out_dptr = _ctypes.POINTER(_ctypes.POINTER(nn_float))() out_dtype = _ctypes.POINTER(nn_uint)() out_shape_ndim = _ctypes.POINTER(nn_uint)() out_shape_data = _ctypes.POINTER(_ctypes.POINTER(nn_uint))() check_call(_LIB.NNSessionRun( self.handle, fetch.handle, nn_uint(len(src_list)), c_array(_ctypes.c_void_p, feed_placeholders), c_array(_ctypes.c_void_p, feed_dptr), c_array(nn_uint, feed_dtype), c_array(nn_uint, feed_shape_csr_ptr), c_array(nn_uint, feed_shape_data), _ctypes.byref(out_size), _ctypes.byref(out_dptr), _ctypes.byref(out_dtype), _ctypes.byref(out_shape_ndim), _ctypes.byref(out_shape_data))) ret = [] for i in range(out_size.value): shape = tuple(out_shape_data[i][:out_shape_ndim[i]]) ret.append(_get_numpy(out_dptr[i], out_dtype[i], shape)) return ret[0] if len(ret) == 1 else ret
def test_create_full_graph(): x = sym.Variable("x") y = sym.Variable("y") z1 = sym.elemwise_add(x, sym.sqrt(y)) z2 = sym.log(x) symbol = sym.Group([z1, z2]) compute_graph = graph.create(symbol, need_backward=True) assert (compute_graph.index.num_nodes == 11) head_grads = [sym.Variable("g1"), sym.Variable("g2")] compute_graph = graph.create(symbol, need_backward=True, head_grads=head_grads) ir = compute_graph.ir() assert (compute_graph.index.num_nodes == 11) assert ("g1" in ir) assert ("g2" in ir) fixed_args = ["x"] compute_graph = graph.create(symbol, need_backward=True, fixed_args=fixed_args) assert (compute_graph.index.num_nodes == 8)
def convert(self, lst, context): """Converts the list of nodes to a runnable form. All the nodes in the list must represent linear flow (no calls, branches, ...) Returns: (fn, inputs, outputs): - fn: A callable function - inputs: the list of inputs nodes whose values should be provided to the function - outputs: the list of output nodes corresponding to the outputs of the function Notes: This implementation converts the nodes to NNVM and compiles it. """ self.c = count() self.eqv = {} self.inputs = [] self.input_names = [] self.constants = {} self.constant_vars = {} self.shapes = {} self.types = {} self.context = context for n in lst: assert n.is_apply() assert n.inputs[0].is_constant(Primitive) fn = n.inputs[0].value conv = self.mapping.get(fn, None) if conv is not None: self.eqv[n] = conv(self, *n.inputs[1:]) else: raise NotImplementedError(fn) outputs = get_outputs(lst, lst[0].graph.manager.uses, set(self.eqv.keys())) inmap = dict((self.eqv[i], i) for i in self.inputs) # Check for empty functions if all(self.eqv[o] in inmap for o in outputs): return None, [inmap[self.eqv[o]] for o in outputs], outputs target = context.MASK2STR[context.device_type] if target == 'cpu': nnvm_target = 'llvm' elif target == 'gpu': nnvm_target = 'cuda -libs=cublas' g = nnvm.graph.create(sym.Group(list(self.eqv[o] for o in outputs))) dg, lib, params = nnvm.compiler.build(g, target=nnvm_target, shape=self.shapes, dtype=self.types, params=self.constants) shape = dg.json_attr('shape') types = dg.json_attr('dtype') index = dg.index def spec(entry_id): return (shape[entry_id], graph_attr.TCODE_TO_DTYPE[types[entry_id]]) output_specs = [spec(index.entry_id(x)) for x in index.output_entries] assert len(output_specs) == len(outputs) module = graph_runtime.create(dg, lib, self.context) for n, p in params.items(): module.set_input(n, p) input_types = [self.types[i] for i in self.input_names] return (NNVMRunner(module, self.input_names, input_types, output_specs, self.context), self.inputs, outputs)
def test_check_function(): # test the testing function x = sym.Variable("x") y = sym.Variable("y") # different styles of returning gradients from the backward function check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: [head_grads, 2 * head_grads], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: (head_grads, 2 * head_grads), shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: { 'x': head_grads, 'y': 2 * head_grads }, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: {'y': 2 * head_grads}, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: [2 * head_grads], grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: 2 * head_grads, grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32') check_function(x + 2 * y, lambda x, y: x + 2 * y, lambda x, y, head_grads: 2 * head_grads, grad_input_vars=[y], shape={ 'x': (1, 2), y: (1, 2) }, dtype='float64') # test just numerical gradients # different styles of shape and dtype passing check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, numerical_grads=True) check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, dtype='float32', numerical_grads=True) check_function(x + 2 * y, shape={ 'x': (1, 2), y: (1, 2) }, dtype={ x: 'float32', 'y': 'float32' }, numerical_grads=True) check_function(x + 2 * y, shape=(1, 2), dtype='float32', numerical_grads=True) # specifying variable attributes on variable creation # (in this case type codes must be used) x = sym.Variable("x", dtype=0, shape=(1, 2)) check_function(x + 2 * y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True) y = sym.Variable("y", dtype=0, shape=(1, 2)) # shape overriding def _fwd1(x, y): assert x.shape == (1, 1) assert y.shape == (1, 2) return x + 2 * y check_function(x + 2 * y, _fwd1, shape={x: (1, 1)}) # in_range def _fwd2(x, y): assert x.shape == (100, ) assert (x <= 0.9).all() assert (x >= 0.8).all() return x + 2 * y check_function(x + 2 * y, _fwd2, shape=(100, ), in_range=(0.8, 0.9), numerical_grads=False) check_function(x + 2 * y, _fwd2, shape=(100, ), in_range={'x': (0.8, 0.9)}, numerical_grads=False) check_function(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0], in_range={'head_grads_0': (1.0, 1.0)}) # explicit passing of values check_function(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0], values={'head_grads_0': np.full((1, 2), 1.0)}) # check that the function reports errors def _check_function_must_fail(*args, **kwargs): error = AssertionError if 'error' in kwargs: error = kwargs['error'] del kwargs['error'] try: check_function(*args, quiet=True, **kwargs) except error: pass else: raise AssertionError("check_function didn't raise an exception") _check_function_must_fail(x + 2 * y, error=ValueError) _check_function_must_fail(x + 2 * y, lambda x, y: x + y) _check_function_must_fail(x + 2 * y, backward=lambda x, y, head_grads: [1.0, 2.0]) _check_function_must_fail(sym.block_grad(x + 2 * y), numerical_grads=True) _check_function_must_fail(x * x, numerical_grads=True, numerical_grads_params={ 'atol': 0.0, 'rtol': 0.0 }) _check_function_must_fail(sym.log(-x * x), numerical_grads=True, error=ValueError) # different styles of returning results from the forward function check_function(x + 2 * y, lambda x, y: [x + 2 * y], numerical_grads=False) _check_function_must_fail(x + 2 * y, lambda x, y: [x + 2 * y, x], numerical_grads=False, error=ValueError) _check_function_must_fail(x + 2 * y, lambda x, y: [], numerical_grads=False, error=ValueError) # multiple outputs z = sym.Group([2 * x + y, x + 2 * y]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y]) check_function(z, lambda x, y: (2 * x + y, x + 2 * y)) check_function( z, backward=lambda x, y, head_grads: [2 * head_grads[0] + head_grads[1], head_grads[0] + 2 * head_grads[1]]) _check_function_must_fail(z, backward=lambda x, y, head_grads: [2 * head_grads[0], 2 * head_grads[1]]) check_function( z, backward=lambda x, y, head_grads: [head_grads[1], 2 * head_grads[1]], in_range={'head_grads_0': (0, 0)}) check_function(z, numerical_grads=True) z = sym.Group([sym.block_grad(2 * x + y), x + 2 * y]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y], numerical_grads=False) _check_function_must_fail(z, lambda x, y: [2 * x + y, x + 2 * y]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2 * x + y, sym.block_grad(x + 2 * y)]) _check_function_must_fail(z, numerical_grads=True) z = sym.Group([2 * x + y, x + 2 * y, x, y, sym.sum(x)]) check_function(z, lambda x, y: [2 * x + y, x + 2 * y, x, y, np.sum(x)]) # passing additional parameters to forward and backward def _fwd3(x, p): assert p == 'v' return x + 1 def _bwd3(x, p, head_grads): assert p == 'v' return head_grads check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'}) # implicitly created variables and shape/dtype inference for inputs x = sym.Variable("x", shape=(2, 3), dtype=0) b = sym.Variable("b") y = sym.dense(data=x, bias=b, units=4) # Don't check gradients on cuda because is doesn't yet support ewise after reduce check_function(y, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True) check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True) x = sym.Variable("x") b = sym.Variable("b") w = sym.Variable("w") y = sym.dense(data=x, bias=b, weight=w, units=4) def _fwd_dense(x, w, b): return np.dot(x, w.T) + b check_function(y, _fwd_dense, shape={'x': (1, 2)}, dtype={'x': 'float32'}, numerical_grads=False) check_function(y, _fwd_dense, shape={'x': (1, 2)}, dtype={'w': 'float64'}, numerical_grads=False) _check_function_must_fail(y, _fwd_dense, shape={'x': (1, 2)}, dtype={ 'w': 'float64', 'b': 'float32' }, numerical_grads=False, error=nnvm._base.NNVMError) # fails because no shape _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError) # ok because type is float32 by default check_function(y, _fwd_dense, shape={'x': (1, 2)}, numerical_grads=False)
def group(*inputs): x = _symbol_internal._nop() x._add_control_deps(symbol.Group(inputs)) return x
data2 = symbol.Variable(name="data2") net1 = data1 + data2 data_shape = (2,) shape_dict = {"data1": data_shape, "data2": data_shape} params = {} params["data1"] = data1 = np.random.uniform(-1, 1, size=data_shape).astype("float32") params["data2"] = data2 = np.random.uniform(-1, 1, size=data_shape).astype("float32") data3 = symbol.Variable(name="data3") data4 = symbol.Variable(name="data4") net2 = data3 + data4 shape_dict.update({"data3": data_shape, "data4": data_shape}) params["data3"] = data3 = np.random.uniform(-1, 1, size=data_shape).astype("float32") params["data4"] = data4 = np.random.uniform(-1, 1, size=data_shape).astype("float32") net = symbol.Group([net1, net2]) deploy_graph, lib, params = nnvm.compiler.build( net, target="llvm", shape=shape_dict, dtype="float32", params=params) temp = path.curdir path_lib = path.join(temp, "deploy.so") lib.export_library(path_lib) with open(path.join(temp, "deploy.json"), "w") as fo: fo.write(deploy_graph.json()) with open(path.join(temp, "deploy.params"), "wb") as fo: fo.write(nnvm.compiler.save_param_dict(params)) loaded_lib = tvm.module.load(path_lib) loaded_json = open(path.join(temp, "deploy.json")).read() loaded_json = graph.load_json(loaded_json)