def verify_arange(start, stop, step): dtype = "float32" if start is None and step is None: x = relay.arange(relay.const(stop, dtype=dtype)) ref_res = np.arange(stop).astype(dtype) elif start is None: x = relay.arange(relay.const(stop, dtype=dtype), step=relay.const(step, dtype=dtype)) ref_res = np.arange(stop, step=step).astype(dtype) elif step is None: x = relay.arange(relay.const(start, dtype=dtype), relay.const(stop, dtype=dtype)) ref_res = np.arange(start, stop).astype(dtype) else: x = relay.arange(relay.const(start, dtype=dtype), relay.const(stop, dtype=dtype), relay.const(step, dtype=dtype)) ref_res = np.arange(start, stop, step).astype(dtype) func = relay.Function([], x) for target, ctx in ctx_list(): for kind in ["graph", "debug"]: intrp = relay.create_executor(kind, ctx=ctx, target=target) op_res = intrp.evaluate(func)() tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
def test_arange_full_and_reshape(self): start = relay.expr.const(0.0) stop = relay.expr.const(10.0) step = relay.expr.const(1.0) fill_val = relay.expr.const(1.0) fill_shape = [10, 1] dtype = "float32" left = relay.arange(start, stop, step, dtype) left = relay.reshape(left, [-1, 1]) left = relay.reshape(left, [1, -1]) right = relay.full(fill_val, fill_shape, dtype) right = relay.reshape(right, [1, -1]) net = relay.multiply(left, right) mod = tvm.IRModule.from_expr(net) params = {} xgraph = xf_relay.from_relay(mod, params) layers = xgraph.get_layers() assert len(layers) == 10 assert layers[0].type[0] == "Constant" assert layers[3].type[0] == "AnyOp" assert layers[7].type[0] == "AnyOp" assert layers[5].shapes == [1, 10] assert layers[8].shapes == [1, 10]
def test_arange(self): start = relay.expr.const(1.0) stop = relay.expr.const(5.0) interval = relay.expr.const(1.5) a = relay.arange(start, stop, interval) net = relay.Function([], a) mod = tvm.IRModule.from_expr(net) mod = relay.transform.InferType()(mod) xgraph = xf_relay.from_relay(mod, {}) layers = xgraph.get_layers() assert len(layers) == 4 assert layers[0].type[0] == "Constant" assert layers[0].shapes == [1] assert layers[1].type[0] == "Constant" assert layers[1].shapes == [1] assert layers[2].type[0] == "Constant" assert layers[2].shapes == [1] assert layers[3].type[0] == "AnyOp" assert layers[3].shapes == [3]
def test_do_not_convert_arange(): """Arange is a red listed operation and therefore should never be fp16.""" dtype = "float32" arange = relay.arange(relay.const(1, dtype), relay.const(128, dtype)) mod = tvm.IRModule.from_expr(arange) out_mod = ToMixedPrecision("float16")(mod) orig_mod = tvm.relay.transform.InferType()(mod) assert tvm.ir.structural_equal(orig_mod, out_mod)
def test_do_not_convert_arange(): """Arange is a red listed operation and therefore should never be fp16.""" dtype = "float32" arange = relay.arange(relay.const(1, dtype), relay.const(128, dtype)) mod = tvm.IRModule.from_expr(arange) mod = tvm.relay.transform.InferType()(mod) output_mod = verify_mixed_precision_output_close(mod, {}, atol=0.0, rtol=0) assert tvm.ir.structural_equal(mod, output_mod)
def test_arange_grad(): # TODO: testing arange numerically is strange because two-sided approx can # produce different output shapes dtype = "float64" start = relay.var("start", relay.TensorType((), dtype)) stop = relay.var("stop", relay.TensorType((), dtype)) step = relay.var("step", relay.TensorType((), dtype)) values = [np.array(v, dtype=dtype) for v in [2.5, 9.5, 1.8]] fwd_func = relay.Function([start, stop, step], relay.arange(start, stop, step, dtype)) check_grad(fwd_func, inputs=values)
def verify_arange(start, stop, step): dtype = 'float32' if ((start is None) and (step is None)): x = relay.arange(relay.const(stop, dtype=dtype)) ref_res = np.arange(stop).astype(dtype) elif (start is None): x = relay.arange(relay.const(stop, dtype=dtype), step=relay.const(step, dtype=dtype)) ref_res = np.arange(stop, step=step).astype(dtype) elif (step is None): x = relay.arange(relay.const(start, dtype=dtype), relay.const(stop, dtype=dtype)) ref_res = np.arange(start, stop).astype(dtype) else: x = relay.arange(relay.const(start, dtype=dtype), relay.const(stop, dtype=dtype), relay.const(step, dtype=dtype)) ref_res = np.arange(start, stop, step).astype(dtype) func = relay.Function([], x) for (target, ctx) in tvm.testing.enabled_targets(): for kind in ['graph', 'debug']: intrp = relay.create_executor(kind, ctx=ctx, target=target) op_res = intrp.evaluate(func)() tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-05)
def verify_arange(start, stop, step): dtype = "float32" if start is None and step is None: x = relay.arange(stop) ref_res = np.arange(stop) elif start is None: x = relay.arange(stop, step=step) ref_res = np.arange(stop, step=step) elif step is None: x = relay.arange(start, stop) ref_res = np.arange(start, stop) else: x = relay.arange(start, stop, step) ref_res = np.arange(start, stop, step) func = relay.Function([], x) for target, ctx in ctx_list(): for kind in ["graph", "debug"]: intrp = relay.create_executor(kind, ctx=ctx, target=target) op_res = intrp.evaluate(func)() tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
def generate_relay_counter_array(self, counter): """Generate relay symbolic uint64 counter array for Philox2x32 RNG. Generate a relay vector of 64-bits integers which encodes couples (counter, i) for i in range(n) counter must be a relay expression (e.g. a relay constant or variable). """ c = relay.cast(counter, "uint64") b = relay.op.transform.full(c, (self.n, ), "uint64") d = relay.left_shift(b, RELAY_UINT64_32) e = relay.arange(relay.const(self.n, "uint64"), dtype="uint64") return relay.bitwise_or(d, e)
def flexible_dispatch(mod, buckets, axis=0, auto_pad=False, pad_value=0, input_indices=None, affects_output=True): """ Enable inference of multiple shaped inputs in one module. This transformation adds a handler around a module that checks input shapes and dispatches to a subgraph specialized to handle the specific shapes of that input. If no exactly matching subgraph is available, the input will be run using full dynamism. For best performance, specify all the sizes the module will be likely to see using the buckets argument. By default, this function will dispatch shapes that exactly match one of the buckets to a corresponding subgraph. All non-matching shapes use the same fully dynamic fallback. This can be detrimental to performance for those non-matching shapes. Setting auto_pad to True causes this function to round-up the shape of non-matching inputs to the closest bucket. This allows them to use the tuned kernels of bucket shapes which can improve performance. Functions that have multiple inputs sharing a dynamic axis, which is common for batch size or sequence length dynamism, are supported through the input_indices argument. Many types of dynamism such as batching affect both the input and output shape, however this is not always the case. If the output shape is independent of the input, the affects_output argument of this function must be set to False. Parameters ---------- buckets: list[int] The sizes of the input dimension that should be explicitly handled. Each value in buckets will have a corresponding subgraph constructed to handle it. axis: int The dimension of the input that should be made flexible. This will most often be used for the batch dimension. auto_pad: Optional[bool] If True, then padding will be inserted to values that don't match one of the provided buckets. pad_value: Optional[float] When auto_pad is true, padding will be done with this value. input_indices: Optional[List[int]] Which inputs should be dispatched dynamically, provided by index. All inputs must share the same dynamic axis. affects_output: Optional[bool] Whether the change in input shape has a corresponding effect on the output shape. Batching for example effects both the input and output whereas changing sequence length in an NLP model typically does not. Returns ------- mod : IRModule The new module wrapped with a flexible shape dispatch handler. """ main_fn = mod["main"] # Default to single input if not specified. if input_indices is None: input_indices = [0] # Extract all input data and create a new dynamic variable for each. data = [] dyn_data = [] for i in input_indices: data.append(main_fn.params[i]) dyn_shape = override_shape(data[i].type_annotation, axis, relay.Any()) dyn_data.append(relay.Var(data[i].name_hint, type_annotation=dyn_shape)) # Extract the dynamic shape value from one of the inputs. rt_sh = relay.op.shape_of(dyn_data[0]) flex_value = relay.op.take(rt_sh, relay.const(axis)) if_exprs = [] for i, bucket in enumerate(buckets): input_data = dyn_data check_dim = flex_value # Apply automatic padding if specified. if auto_pad: input_data = [] # Construct padding expression for inputs. for j, inp in enumerate(dyn_data): pad_width = relay.const(bucket) - flex_value rank = len(data[j].type_annotation.shape) pads = relay.zeros([rank, 2], "int32") pads = relay.scatter_nd(pads, relay.const([axis, 1]), pad_width) padded_value = relay.nn.pad(inp, pads, pad_value) # Determine if this is the proper bucket to pad to. Do this by checking if the # input shape is between this bucket and the previous. if i == 0: padded_value = relay.If( relay.op.less_equal(flex_value, relay.const(bucket)), padded_value, inp) else: padded_value = relay.If( relay.op.logical_and( relay.op.less_equal(flex_value, relay.const(bucket)), relay.op.greater(flex_value, relay.const(buckets[i - 1])), ), padded_value, inp, ) # Update input value and test dimension to reflect possible padding. input_data.append(padded_value) # Grab the new possibly padded shape for checking bucket size. check_dim = relay.op.take(relay.op.shape_of(input_data[0]), relay.const(axis)) # Create a specialized subgraph for the current bucket. spec_call, spec_ty = specialize_body(mod, main_fn, axis, bucket, input_indices=input_indices, affects_output=affects_output) # Apply hard casting to shape to create statically typed graphs. spec_data = [] for j, inp in enumerate(input_data): spec_data.append(relay.op.reshape(inp, spec_ty[j].shape)) # Create a dispatch statement for the current specialized graph. call_args = list(main_fn.params) for j, inp in enumerate(input_indices): call_args[inp] = spec_data[j] new_call = spec_call(*call_args) # Remove meaningless padded outputs if applicable. if auto_pad and affects_output: new_call = relay.take( new_call, relay.arange(start=relay.const(0), stop=flex_value, dtype="int32"), axis=axis, ) # Add this new case to the dispatch handler. if_exprs.append((relay.op.equal(check_dim, relay.const(bucket)), new_call)) # Create a subgraph to handle all other shapes. default_dyn_call, _ = specialize_body(mod, main_fn, axis, relay.Any(), input_indices=input_indices, affects_output=affects_output) call_args = list(main_fn.params) for j, inp in enumerate(input_indices): call_args[inp] = dyn_data[j] new_body = default_dyn_call(*call_args) # Create an If chain to dispatch shapes to the appropriate specialized subgraph. for cond, true_branch in if_exprs: new_body = relay.If(cond, true_branch, new_body) # Assign new parameters to the function. new_params = list(main_fn.params) for j, inp in enumerate(input_indices): new_params[inp] = dyn_data[j] # Update the output shape to be dynamic if needed. if affects_output: dyn_ret_type = override_shape(main_fn.ret_type, axis, relay.Any()) else: dyn_ret_type = main_fn.ret_type # Assign the handler as the new entrypoint in the module. new_main = relay.Function(new_params, new_body, dyn_ret_type, main_fn.type_params, main_fn.attrs) mod["main"] = new_main # Do type inference to make sure everything worked. mod = relay.transform.InferType()(mod) return mod