def run_op_node(input_data, op_fun, *args): """Run computation on node performing `op_fun`. `op_fun` have to needs to accept a node as an argument. :param input_data: The input data for performed computation. :param op_fun: The function handler for operation we want to carry out. :param args: The arguments passed to operation we want to carry out. :return: The result from computations. """ runtime = get_runtime() comp_args = [] op_fun_args = [] comp_inputs = [] for idx, data in enumerate(input_data): if np.isscalar(data): op_fun_args.append(ng.constant(data, _get_numpy_dtype(data))) else: node = ng.parameter(data.shape, name=ascii_uppercase[idx], dtype=data.dtype) op_fun_args.append(node) comp_args.append(node) comp_inputs.append(data) op_fun_args.extend(args) node = op_fun(*op_fun_args) computation = runtime.computation(node, *comp_args) return computation(*comp_inputs)
def ConvTranspose( onnx_node, ng_inputs): # type: (NodeWrapper, List[NgraphNode]) -> NgraphNode """Calculate convolution transpose.""" if len(ng_inputs) == 3: data, weights, bias = ng_inputs elif len(ng_inputs) == 2: data, weights = ng_inputs bias = ng.constant(0, dtype=get_dtype(data.get_element_type())) strides = get_strides(onnx_node) dilation = get_dilations(onnx_node) padding_below, padding_above = get_pads(onnx_node) output_padding = onnx_node.get_attribute_value('output_padding') if output_padding is None: raise ValueError( 'ConvTranspose node (s%): output_padding attribute is required.', onnx_node.name) data_shape = list(data.shape) weights_shape = list(weights.shape) num_spatial_dims = len(data.shape) - 2 data_dilation_strides = [1, 1] data_batch_shape = [1] * (num_spatial_dims + 2) data_batch_shape[0] = data_shape[0] data_batch_shape[1] = weights_shape[1] for i in range(num_spatial_dims): # Calculating spatial dims of data output shape for ngraph conv backprop op # | pb + s(ds-1) + op - d(ws-1)+1 | # | ----------------------------- | + 1 # |_ dds _| # # d - dilation # ds - data shape # dds - data dilation strides # op - putput padding # pb - padding below # s - strides # ws - weights shape data_batch_shape[i + 2] = ( (padding_below[i] + ((data_shape[i + 2] - 1) * strides[i] + 1) + output_padding[i]) - ((weights_shape[i + 2] - 1) * dilation[i] + 1) + 1) // data_dilation_strides[i] + 1 transconv = ng.convolution_backprop_data(data_batch_shape, weights, data, strides, dilation, padding_below, padding_above, data_dilation_strides) if len(bias.shape) > 0: return transconv + ng.broadcast_to(bias, transconv.shape, 1) else: return transconv
def test_constant_get_data_floating_point(data_type): np.random.seed(133391) input_data = np.random.randn(2, 3, 4).astype(data_type) min_value = -1.0e20 max_value = 1.0e20 input_data = min_value + input_data * max_value * data_type(2) node = ng.constant(input_data, dtype=data_type) retrieved_data = node.get_data() assert np.allclose(input_data, retrieved_data)
def test_constant_get_data_signed_integer(data_type): np.random.seed(133391) input_data = np.random.randint(np.iinfo(data_type).min, np.iinfo(data_type).max, size=[2, 3, 4], dtype=data_type) node = ng.constant(input_data, dtype=data_type) retrieved_data = node.get_data() assert np.allclose(input_data, retrieved_data)
def test_scalar(transformer_factory): """TODO.""" # Simple evaluation of a scalar val = 5 x = ng.constant(val) cval = executor(x)() assert cval.shape == () np.testing.assert_allclose(cval, val)
def test_cputensor_fusion(): """TODO.""" M = ng.make_axis(length=1) N = ng.make_axis(length=3) np_a = np.array([[1, 2, 3]], dtype=np.float32) np_b = np.array([[3, 2, 1]], dtype=np.float32) np_d = np.multiply(np_b, np.add(np_a, 2)) a = ng.constant(np_a, [M, N]) b = ng.constant(np_b, [M, N]) c = ng.constant(2) d = ng.multiply(b, ng.add(a, c)) with executor(d) as ex: result = ex() print(result) assert np.array_equal(result, np_d)
def test_constant_get_data_unsigned_integer(data_type): np.random.seed(133391) input_data = np.random.randn(2, 3, 4).astype(data_type) input_data = (np.iinfo(data_type).min + input_data * np.iinfo(data_type).max + input_data * np.iinfo(data_type).max) node = ng.constant(input_data, dtype=data_type) retrieved_data = node.get_data() assert np.allclose(input_data, retrieved_data)
def test_elementwise_fp16_in(transformer_factory): Y = ng.make_axis(name='Y') N = ng.make_axis(name='N') Y.length = 2 N.length = 2 a = ng.constant(np.array([[1.0, 2.0], [4.0, 12.0]], dtype='float16'), [Y, N], dtype=np.dtype(np.float16)) b = ng.constant(np.array([[1.0, 2.0], [6.0, 12.0]], dtype='float16'), [Y, N], dtype=np.dtype(np.float16)) c = ng.multiply(a, b) result = executor(c)() np.testing.assert_allclose(result, [[1.0, 4.0], [24.0, 144.0]])
def test_node_factory_topk(): dtype = np.int32 data = ng.parameter([2, 10], dtype=dtype, name="A") k = ng.constant(3, dtype=dtype, name="B") factory = _NodeFactory("opset1") node = factory.create("TopK", [data, k], {"axis": 1, "mode": "max", "sort": "value"}) assert node.get_type_name() == "TopK" assert node.get_output_size() == 2 assert list(node.get_output_shape(0)) == [2, 3]
def create_diff_if_with_two_outputs(condition_val): condition = ng.constant(condition_val, dtype=np.bool) # then_body X_t = ng.parameter([2], np.float32, "X") Y_t = ng.parameter([2], np.float32, "Y") mmul_t = ng.matmul(X_t, Y_t, False, False) mul_t = ng.multiply(Y_t, X_t) then_body_res_1 = ng.result(mmul_t) then_body_res_2 = ng.result(mul_t) then_body = GraphBody([X_t, Y_t], [then_body_res_1, then_body_res_2]) then_body_inputs = [ TensorIteratorInvariantInputDesc(1, 0), TensorIteratorInvariantInputDesc(2, 1) ] then_body_outputs = [ TensorIteratorBodyOutputDesc(0, 0), TensorIteratorBodyOutputDesc(1, 1) ] # else_body X_e = ng.parameter([2], np.float32, "X") Z_e = ng.parameter([], np.float32, "Z") mul_e = ng.multiply(X_e, Z_e) else_body_res_1 = ng.result(Z_e) else_body_res_2 = ng.result(mul_e) else_body = GraphBody([X_e, Z_e], [else_body_res_1, else_body_res_2]) else_body_inputs = [ TensorIteratorInvariantInputDesc(1, 0), TensorIteratorInvariantInputDesc(3, 1) ] else_body_outputs = [ TensorIteratorBodyOutputDesc(0, 0), TensorIteratorBodyOutputDesc(1, 1) ] X = ng.constant([3, 4], dtype=np.float32) Y = ng.constant([2, 1], dtype=np.float32) Z = ng.constant(4.0, dtype=np.float32) if_node = ng.if_op(condition, [X, Y, Z], (then_body, else_body), (then_body_inputs, else_body_inputs), (then_body_outputs, else_body_outputs)) return if_node
def test_lrn(): input_image_shape = (2, 3, 2, 1) input_image = np.arange(int( np.prod(input_image_shape))).reshape(input_image_shape).astype("f") axes = np.array([1], dtype=np.int64) runtime = get_runtime() model = ng.lrn(ng.constant(input_image), ng.constant(axes), alpha=1.0, beta=2.0, bias=1.0, size=3) computation = runtime.computation(model) result = computation() assert np.allclose( result, np.array( [ [[[0.0], [0.05325444]], [[0.03402646], [0.01869806]], [[0.06805293], [0.03287071]]], [[[0.00509002], [0.00356153]], [[0.00174719], [0.0012555]], [[0.00322708], [0.00235574]]], ], dtype=np.float32, ), ) # Test LRN default parameter values model = ng.lrn(ng.constant(input_image), ng.constant(axes)) computation = runtime.computation(model) result = computation() assert np.allclose( result, np.array( [ [[[0.0], [0.35355338]], [[0.8944272], [1.0606602]], [[1.7888544], [1.767767]]], [[[0.93704253], [0.97827977]], [[1.2493901], [1.2577883]], [[1.5617375], [1.5372968]]], ], dtype=np.float32, ), )
def test_argmin(): runtime = get_runtime() input_x = ng.constant(np.array([[12, 2, 10], [9, 8, 4], [6, 1, 5], [3, 11, 7]], dtype=np.float32)) model = runtime.computation(ng.argmin(input_x, 0)) result = model() assert np.allclose(result, np.array([3, 2, 1], dtype=np.int32))
def test_convert_like(): parameter_data = ng.parameter([1, 2, 3, 4], name="data", dtype=np.float32) like = ng.constant(1, dtype=np.int8) node = ng.convert_like(parameter_data, like) assert node.get_type_name() == "ConvertLike" assert node.get_output_size() == 1 assert list(node.get_output_shape(0)) == [1, 2, 3, 4] assert node.get_output_element_type(0) == Type.i8
def test_scalar(): """TODO.""" # Simple evaluation of a scalar val = 5 x = ng.constant(val) with executor(x) as ex: cval = ex() assert cval.shape == () ng.testing.assert_allclose(cval, val)
def get_simple_graph(): ax = ng.make_axes([ng.make_axis(name='C', length=1)]) base_op = ng.constant(5.0, ax).named("weird_name#@$") base_op.metadata["string"] = "stringval" simple_graph = ng.log(ng.exp(base_op)) simple_graph.metadata.update(string_val="foo", bool_val=True, float_val=6.5, int_val=2) return base_op, simple_graph
def Reshape(self, tf_node, inputs): """ Reshapes a tensor. Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: tensor, shape, name """ # TODO: currently only support constants and flatten to 1d and 2d # get inputs tensor, shape = inputs def get_flatten_idx(shape_i, shape_o): """ check if flattening shape is valid Args: shape_i: input tensor shape shape_o: output flattend tensor shape Returns: None if flatten not valid, otherwise the flatten_at index """ return None # get input and output shape shape_i = tensor.shape.lengths shape_o = tuple(shape.const.astype(int)) if np.prod(shape_i) != np.prod(shape_o): raise ValueError("Total size of input and output dimension " "mismatch.") if tensor.const is not None: # reshape const np_val = np.reshape(tensor.const, shape_o) return ng.constant(np_val, make_pos_axes(np_val.shape)).named(tf_node.name) else: ndims_o = len(shape_o) if ndims_o != 1 and ndims_o != 2: raise NotImplementedError("Reshape can only support flatten" "to 1d or 2d.") if ndims_o == 1: tensor = ng.flatten(tensor) else: cumprods = list(np.cumprod(shape_i)) flatten_at_idx = cumprods.index(shape_o[0]) + 1 tensor = ng.flatten_at(tensor, flatten_at_idx) res = ng.cast_axes(tensor, make_pos_axes(shape_o)) return res.named(tf_node.name)
def test_rnn_deriv_ref(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, transformer_factory): assert batch_size == 1, "the recurrent reference implementation only support batch size 1" assert return_sequence is True, "the reference rnn only supports sequences for deriv" # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size, weight_initializer, bias_initializer) # Compute reference numpy RNN rnn_ref = RefRecurrent(input_size, hidden_size, return_sequence=return_sequence) rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.bh.shape)) # Prepare deltas for gradient check output_shape = (hidden_size, sequence_length, batch_size) # generate random deltas tensor deltas = np.random.randn(*output_shape) # the reference code expects these shapes: # input_shape: (seq_len, input_size, batch_size) # output_shape: (seq_len, hidden_size, batch_size) dW_in, dW_rec, db = rnn_ref.lossFun(input_value.transpose([1, 0, 2]), deltas.copy().transpose([1, 0, 2]), init_states=init_state_value)[:3] # Generate ngraph RNN rnn_ng = Recurrent(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(), reset_cells=True, return_sequence=return_sequence) # fprop ngraph RNN out_ng = rnn_ng.train_outputs(input_placeholder) deltas_constant = ng.constant(deltas, axes=out_ng.axes) params = [(rnn_ng.W_input, W_in), (rnn_ng.W_recur, W_rec), (rnn_ng.b, b)] with ExecutorFactory() as ex: # Create derivative computations and execute param_updates = list() for px, _ in params: update = ng.deriv(out_ng, px, error=deltas_constant) param_updates.append(ex.executor(update, input_placeholder)) for update_fun, ref_val in zip(param_updates, [dW_in, dW_rec, db]): ng.testing.assert_allclose(update_fun(input_value), ref_val.squeeze(), rtol=bprop_rtol, atol=bprop_atol)
def test_process_leak(transformer_factory): baseline = active_children() with ng.metadata(device_id=('2')): x = ng.constant(2) assert len(active_children()) == 0 with ExecutorFactory() as ex: comp = ex.executor(x) assert len(active_children()) == 1 comp() assert len(active_children()) == 2 assert len(active_children()) == len(baseline)
def run_training(self, in_obj, init_states, **kwargs): if self.celltype == 'LSTM': init_states = [(state, ng.constant(0., state.axes)) for state in init_states] for i, l in enumerate(self.layers): if i < len(init_states): in_obj = l(in_obj, init_state=init_states[i], **kwargs) else: in_obj = l(in_obj, **kwargs) return in_obj
def test_adaptive_avg_pool(): runtime = get_runtime() input = np.reshape([ 0.0, 4, 1, 3, -2, -5, -2, -2, 1, -3, 1, -3, -4, 0, -2, 1, -1, -2, 3, -1, -3, -1, -2, 3, 4, -3, -4, 1, 2, 0, -4, -5, -2, -2, -3, 2, 3, 1, -5, 2, -4, -2 ], (2, 3, 7)) input_tensor = ng.constant(input) output_shape = ng.constant(np.array([3], dtype=np.int32)) adaptive_pool_node = ng.adaptive_avg_pool(input_tensor, output_shape) computation = runtime.computation(adaptive_pool_node) adaptive_pool_results = computation() expected_results = np.reshape([ 1.66666663, 0.66666669, -3., -1.33333337, -1.66666663, -2.33333325, -0.66666669, 0., -0.33333334, 0., 1.33333337, -2., -0.66666669, -3.66666675, -2.33333325, 2., -0.66666669, -1.33333337 ], (2, 3, 3)) assert np.allclose(adaptive_pool_results, expected_results)
def test_scalar_broadcast(): """ Test broadcasting a scalar into a tensor """ with ExecutorFactory() as ex: x_axes = ng.make_axes() broadcast_axes = ng.make_axes([ng.make_axis(2), ng.make_axis(3)]) x = ng.constant(1., axes=x_axes) z = ng.broadcast(x, axes=broadcast_axes) z_comp = ex.executor(z) assert np.array_equal(z_comp(), np.ones(broadcast_axes.lengths))
def test_tensor_sum_single_reduction_axes(transformer_factory): """TODO.""" Y = ng.make_axis(length=2) N = ng.make_axis(length=2) a = ng.constant(np.array([[1.0, 1.0], [1.0, 1.0]], dtype='float32'), [N, Y]) b = ng.sum(a, reduction_axes=Y) with executor(b) as ex: result = ex() ng.testing.assert_allclose(result, [2.0, 2.0])
def test_evaluation_twice(): """Test executing a computation graph twice on a one layer MLP.""" C = ng.make_axis(length=2) D = ng.make_axis(length=2) W = ng.make_axis(length=1) x = ng.constant(np.array([[1, 2], [3, 4]], dtype='float32'), ng.make_axes([C, D])) hidden1_weights = ng.constant(np.array([[1], [1]], dtype='float32'), ng.make_axes([C, W])) hidden1_biases = ng.constant(np.array([[2], [2]], dtype='float32'), ng.make_axes([D, W])) hidden1 = ng.dot(hidden1_weights, x) + hidden1_biases with executor(hidden1) as comp: result_1 = comp() result_2 = comp() assert np.array_equal(result_1, result_2)
def test_broadcast_deriv_reorder(transformer_factory): H = ng.make_axis(2) W = ng.make_axis(3) x = ng.constant(np.random.rand(2, 3), axes=[H, W]) x_broadcast = ng.broadcast(x, [W, H]) x_sum = ng.sum(x_broadcast, out_axes=()) dx = ng.deriv(x_sum, x) with ExecutorFactory() as ex: dx_fun = ex.executor(dx) ng.testing.assert_allclose(dx_fun(), np.ones((2, 3)))
def test_cputensor_mlp(transformer_factory): """TODO.""" D = ng.make_axis(length=3) H = ng.make_axis(length=2) N = ng.make_axis(length=1) np_x = np.array([[1, 2, 3]], dtype=np.float32) np_w = np.array([[1, 1], [1, 1], [1, 1]], dtype=np.float32) np_b = np.array([1, 2], dtype=np.float32) np_c = np.dot(np_x, np_w) + np_b x = ng.constant(np_x, [N, D]) w = ng.constant(np_w, [D, H]) b = ng.constant(np_b, [H]) wx = ng.dot(x, w) c = wx + b with executor(c) as ex: result = ex() print(result) print(np_c) assert np.array_equal(result, np_c)
def test_kernel_cache(transformer_factory): X = ng.make_axis(32) Y = ng.make_axis(32) C = ng.make_axis(16384) axes = ng.make_axes([X, Y]) bcast_axes = ng.make_axes([X, Y, C]) x_val = np.absolute(np.random.randn(*axes.lengths)) y_val = np.absolute(np.random.randn(*bcast_axes.lengths)) z_val = np.absolute(np.random.randn(*bcast_axes.lengths)) x = ng.constant(x_val, axes) y = ng.constant(y_val, bcast_axes) z = ng.constant(z_val, bcast_axes) out = ng.add(ng.add(x, y), z) with executor(out) as ex: graph_val = ex() np_val = np.add(np.add(x_val.reshape(32, 32, 1), y_val), z_val) np.testing.assert_allclose(graph_val, np_val, rtol=1e-4)
def test_4d_elementwise(transformer_factory, input_axes): # Limiting maximum absolute value for tensors elements to 7.9. # See description in function test_exit_condition above is_flex = is_flex_factory(transformer_factory) clip_val = 7.9 if is_flex else 0 x_val = rng.randn_abs_clip(input_axes, clip_max=clip_val) y_val = rng.randn_abs_clip(input_axes, clip_max=clip_val) x = ng.constant(x_val, input_axes) y = ng.constant(y_val, input_axes) out = ng.add(x, y) with executor(out) as ex: graph_val = ex() np_val = np.add(x_val, y_val) ng.testing.assert_allclose(graph_val, np_val, rtol=1e-4)
def test_adaptive_max_pool(): runtime = get_runtime() input = np.reshape([ 0, 4, 1, 3, -2, -5, -2, -2, 1, -3, 1, -3, -4, 0, -2, 1, -1, -2, 3, -1, -3, -1, -2, 3, 4, -3, -4, 1, 2, 0, -4, -5, -2, -2, -3, 2, 3, 1, -5, 2, -4, -2 ], (2, 3, 7)) input_tensor = ng.constant(input) output_shape = ng.constant(np.array([3], dtype=np.int32)) adaptive_pool_node = ng.adaptive_max_pool(input_tensor, output_shape) computation = runtime.computation(adaptive_pool_node) adaptive_pool_results = computation() expected_results = np.reshape( [4, 3, -2, 1, 1, 0, 1, 3, 3, 3, 4, 1, 2, -2, -2, 3, 2, 2], (2, 3, 3)) expected_indices = np.reshape( [1, 3, 4, 1, 3, 6, 1, 4, 4, 2, 3, 6, 0, 4, 4, 1, 4, 4], (2, 3, 3)) assert np.allclose(adaptive_pool_results, [expected_results, expected_indices])
def ReduceMean( onnx_node, ng_inputs): # type: (NodeWrapper, List[NgraphNode]) -> NgraphNode """Compute the mean value of the input tensor's elements along the provided axes.""" input_shape = list(ng_inputs[0].shape) sum_node = make_reduction_op(ng.sum, onnx_node, ng_inputs[0]) reduction_axes = get_reduction_axes(onnx_node, ng_inputs[0]) avg_elem_count = np.prod([input_shape[x] for x in reduction_axes]) const_node = ng.broadcast_to( ng.constant(avg_elem_count, get_dtype(sum_node.get_element_type())), sum_node.shape) return ng.divide(sum_node, const_node)
def test_convolution_backprop_data(): runtime = get_runtime() output_spatial_shape = [9, 9] filter_shape = [1, 1, 3, 3] data_shape = [1, 1, 7, 7] strides = [1, 1] data_node = ng.parameter(shape=data_shape) filter_node = ng.parameter(shape=filter_shape) output_shape_node = ng.constant( np.array(output_spatial_shape, dtype=np.int64)) deconvolution = ng.convolution_backprop_data(data_node, filter_node, strides, output_shape_node) input_data = np.array( [[[ [-20, -20, 20, 20, 0, 0, 0], [-20, -20, 20, 20, 0, 0, 0], [-20, -20, 20, 20, 0, 0, 0], [-20, -20, 20, 20, 0, 0, 0], [-20, -20, 20, 20, 0, 0, 0], [-20, -20, 20, 20, 0, 0, 0], [-20, -20, 20, 20, 0, 0, 0], ]]], dtype=np.float32, ) filter_data = np.array( [[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]], dtype=np.float32).reshape(1, 1, 3, 3) model = runtime.computation(deconvolution, data_node, filter_node) result = model(input_data, filter_data) assert np.allclose( result, np.array( [[[ [-20.0, -20.0, 40.0, 40.0, -20.0, -20.0, 0.0, 0.0, 0.0], [-60.0, -60.0, 120.0, 120.0, -60.0, -60.0, 0.0, 0.0, 0.0], [-80.0, -80.0, 160.0, 160.0, -80.0, -80.0, 0.0, 0.0, 0.0], [-80.0, -80.0, 160.0, 160.0, -80.0, -80.0, 0.0, 0.0, 0.0], [-80.0, -80.0, 160.0, 160.0, -80.0, -80.0, 0.0, 0.0, 0.0], [-80.0, -80.0, 160.0, 160.0, -80.0, -80.0, 0.0, 0.0, 0.0], [-80.0, -80.0, 160.0, 160.0, -80.0, -80.0, 0.0, 0.0, 0.0], [-60.0, -60.0, 120.0, 120.0, -60.0, -60.0, 0.0, 0.0, 0.0], [-20.0, -20.0, 40.0, 40.0, -20.0, -20.0, 0.0, 0.0, 0.0], ]]], dtype=np.float32, ), )
def test_conv_flatten_deriv(transformer_factory): """ Test deriv of conv followed by flatten """ # set shape C, D, H, W, N = (3, 1, 28, 28, 8) C, T, R, S, K = (3, 1, 5, 5, 32) # i, f, o axes ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N]) ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K]) ax_o = ng.make_axes([ ng.make_axis(32, roles=[ar.Channel]), ng.make_axis(1, roles=[ar.Depth]), ng.make_axis(24, roles=[ar.Height]), ng.make_axis(24, roles=[ar.Width]), ax.N ]) ax_i.set_shape((C, D, H, W, N)) ax_f.set_shape((C, T, R, S, K)) params = dict(pad_d=0, pad_h=0, pad_w=0, str_d=1, str_h=1, str_w=1) axes_rsck = ng.make_axes([ax.R, ax.S, ax.C, ax.K]) axes_rsck_prime = ng.make_axes( [ng.make_axis(l) for l in axes_rsck.lengths]) # broadcast input / filter axes image = ng.constant(np.ones(ax_i.lengths), ax_i) filter = ng.variable(axes_rsck_prime, initial_value=np.ones((R, S, C, K))) filter_casted = ng.cast_axes(filter, axes_rsck) filter_casted = ng.expand_dims(filter_casted, ax.T, 0) filter_casted = ng.axes_with_order(filter_casted, axes=ax_f) # convolution output = ng.convolution(params, image, filter_casted, axes=ax_o) oC, oD, oH, oW, oN = output.axes output = ng.axes_with_order(output, axes=ng.make_axes([oN, oD, oH, oW, oC])) # slice away the oD out_slicing = [slice(None), 0, slice(None), slice(None), slice(None)] conv = ng.Slice(output, out_slicing) flatten = ng.flatten_at(conv, idx=1) # cost and grad cost = ng.sum(flatten, reduction_axes=flatten.axes) grad = ng.deriv(cost, filter) # compute conv_grad_comp = executor([conv, grad]) conv_val, grad_val = conv_grad_comp() assert np.allclose(conv_val, np.zeros_like(conv_val) + 75.) assert np.allclose(grad_val, np.zeros_like(grad_val) + 4608.)
def position_encoding(sentence_axis, embedding_axis): """ Position Encoding used by the end to end memory network algorithms """ sentence_size = sentence_axis.length embedding_size = embedding_axis.length encoding = np.ones((embedding_size, sentence_size), dtype=np.float32) ls = sentence_size + 1 le = embedding_size + 1 for i in range(1, le): for j in range(1, ls): encoding[i - 1, j - 1] = (i - (embedding_size + 1) / 2) * (j - (sentence_size + 1) / 2) encoding = 1 + 4 * encoding / embedding_size / sentence_size # Make position encoding of time words identity to avoid modifying them encoding[:, -1] = 1.0 encoding = np.transpose(encoding) return ng.constant(encoding, axes=[sentence_axis, embedding_axis])
def __init__( self, cands, num_cands, max_cand_len, memory_size, max_utt_len, vocab_size, emb_size, batch_size, use_match_type=False, kb_ents_to_type=None, kb_ents_to_cand_idxs=None, match_type_idxs=None, nhops=3, eps=1e-6, init=GaussianInit( mean=0.0, std=0.1)): super(MemN2N_Dialog, self).__init__() self.cands = cands self.memory_size = memory_size self.max_utt_len = max_utt_len self.vocab_size = vocab_size self.num_cands = num_cands self.max_cand_len = max_cand_len self.batch_size = batch_size self.use_match_type = use_match_type self.kb_ents_to_type = kb_ents_to_type self.kb_ents_to_cand_idxs = kb_ents_to_cand_idxs self.match_type_idxs = match_type_idxs self.nhops = nhops self.eps = eps self.init = init # Make axes self.batch_axis = ng.make_axis(length=batch_size, name='N') self.sentence_rec_axis = ng.make_axis(length=max_utt_len, name='REC') self.memory_axis = ng.make_axis(length=memory_size, name='memory_axis') self.embedding_axis = ng.make_axis(length=emb_size, name='F') self.embedding_axis_proj = ng.make_axis(length=emb_size, name='F_proj') self.cand_axis = ng.make_axis(length=num_cands, name='cand_axis') self.cand_rec_axis = ng.make_axis(length=max_cand_len, name='REC') # Weight sharing of A's accross all hops input and output self.LUT_A = ModifiedLookupTable( vocab_size, emb_size, init, update=True, pad_idx=0) # Use lookuptable W to embed the candidate answers self.LUT_W = ModifiedLookupTable( vocab_size, emb_size, init, update=True, pad_idx=0) # Initialize projection matrix between internal model states self.R_proj = ng.variable( axes=[ self.embedding_axis, self.embedding_axis_proj], initial_value=init) if not self.use_match_type: # Initialize constant matrix of all candidate answers self.cands_mat = ng.constant( self.cands, axes=[ self.cand_axis, self.cand_rec_axis])
REC = ng.make_axis(length=max_question, name='REC') # Axis with length of hidden unit size F = ng.make_axis(length=hidden_size, name='F') # Axis with length of embedding size F_embed = ng.make_axis(length=300, name='F_embed') # Axis with length 1 dummy_axis = ng.make_axis(length=1, name='dummy_axis') # Axis with length of answer span span = ng.make_axis(length=2, name='span') # Set up drop out layer dropout_val = ng.slice_along_axis(inputs['dropout_val'], N, 0) dropout_1 = Dropout_Modified(keep=dropout_val) dropout_2 = Dropout_Modified(keep=dropout_val) drop_pointer = ng.maximum(dropout_val, ng.constant(const=0.8, axes=[])) dropout_3 = Dropout_Modified(keep=drop_pointer) dropout_4 = Dropout_Modified(keep=drop_pointer) # Constants required for masking const_LSTM = ng.constant(axes=[F, dummy_axis], const=1) const_loss = ng.constant(axes=[ax.Y, dummy_axis], const=1) const_LSTM_embed = ng.constant(axes=[F_embed, dummy_axis], const=1) # Create masks reorder_para_mask = ng.axes_with_order( inputs['para_len'], axes=[ dummy_axis, inputs['para_len'].axes[2], N]) reorder_ques_mask = ng.axes_with_order( inputs['question_len'], axes=[