def test_distributed_dot_parallel_second_axis(hetr_device): if hetr_device == 'gpu': pytest.xfail( "Axes Layout needs to be fixed for GPUs after changes to make\ parallel_axis the least contiguous axis for scatter/gather communication ops" ) H = ng.make_axis(length=6, name='height') N = ng.make_axis(length=8, name='batch') W1 = ng.make_axis(length=2, name='W1') W2 = ng.make_axis(length=4, name='W2') x = ng.placeholder(axes=[H, N]) w2 = ng.placeholder(axes=[W2, W1]) with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N): w1 = ng.placeholder(axes=[W1, H]) dot1 = ng.dot(w1, x).named("dot1") dot2 = ng.dot(w2, dot1).named("dot2") np_x = np.random.randint(100, size=[H.length, N.length]) np_w1 = np.random.randint(100, size=[W1.length, H.length]) np_w2 = np.random.randint(100, size=[W2.length, W1.length]) with closing(ngt.make_transformer_factory( 'hetr', device=hetr_device)()) as transformer: computation = transformer.computation([dot2, dot1], x, w1, w2) res2, res1 = computation(np_x, np_w1, np_w2) np.testing.assert_array_equal(res1, np.dot(np_w1, np_x)) np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x))) computation2 = transformer.computation([dot1, dot2], x, w1, w2) res1, res2 = computation2(np_x, np_w1, np_w2) np.testing.assert_array_equal(res1, np.dot(np_w1, np_x)) np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x)))
def compare_optimizer_variable_select(opt_ng, opt_ref): # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested np_W1 = np.random.rand(C.length) np_W2 = np.random.rand(C.length) W1 = ng.variable([C], initial_value=np_W1) W2 = ng.variable([C], initial_value=np_W2) # Set up op graph cost = ng.sum(target - ng.dot(W1, data) - ng.dot(W2, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost, variables=[W1]), W1]) # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation([updated_weights, W2], data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: [ng_W1, ng_W2] = opt_ng_comp(x, y) # updated weights for ngraph optimizer np_W1 = opt_ref(x, np_W1) # updated weights for reference optimizer ng.testing.assert_allclose(np_W1, ng_W1, rtol=1e-3) ng.testing.assert_allclose(np_W2, ng_W2, rtol=1e-3)
def train_outputs(self, in_obj): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer Returns: (Tensor): output """ in_axes = in_obj.axes self.time_axis = in_axes.recurrent_axes()[0] def get_steps(x, time_axis): return [ ng.slice_along_axis(x, time_axis, i) for i in range(time_axis.length) ] if self.axes is not None: hidden_axes = self.axes - self.axes.recurrent_axes() else: hidden_axes = ng.make_axes( [ng.make_axis(self.nout).named('Hidden_in')]) w_in_axes = hidden_axes + [ axis - 1 for axis in in_axes.sample_axes() - in_axes.recurrent_axes() ] w_re_axes = hidden_axes + [axis - 1 for axis in hidden_axes] self.W_input = ng.variable(axes=w_in_axes, initial_value=self.init( w_in_axes.lengths)).named("W_in") self.W_recur = ng.variable(axes=w_re_axes, initial_value=self.init_inner( w_re_axes.lengths)).named("W_re") self.b = ng.variable(axes=hidden_axes, initial_value=0).named("bias") h_ff_buf = ng.dot(self.W_input, in_obj).named("W_in_dot_in") h_ff_s = get_steps(h_ff_buf, self.time_axis) self.h_init = ng.constant(np.zeros(h_ff_s[0].axes.lengths), axes=h_ff_s[0].axes).named('h_init') hprev = [self.h_init] for i in range(self.time_axis.length): with ng.metadata(recurrent_step=str(i)): d = ng.dot(self.W_recur, hprev[i]).named("W_rec_dot_h{}".format(i)) h = self.activation(d + h_ff_s[i] + self.b) h.name = "activ{}".format(i) hprev.append(h) rnn_out = ng.stack(hprev[1:], self.time_axis, pos=1) return rnn_out
def __call__(self, inputs): query = ng.cast_axes(inputs['query'], [self.batch_axis, self.sentence_rec_axis]) # Query embedding [batch, sentence_axis, F] q_emb = self.LUT_A(query) # Multiply by position encoding and sum u_0 = ng.sum(q_emb * self.pos_enc, reduction_axes=[self.sentence_rec_axis]) # [batch, F] # Start a list of the internal states of the model. # Will be appended to after each memory hop u = [u_0] for hopn in range(self.nhops): keys = ng.cast_axes(inputs['keys'], [self.batch_axis, self.memory_axis, self.sentence_rec_axis]) value = ng.cast_axes(inputs['values'], [self.batch_axis, self.memory_axis, self.val_len_axis]) # Embed keys m_emb_A = self.LUT_A(keys) m_A = ng.sum(m_emb_A * self.pos_enc, reduction_axes=[self.sentence_rec_axis]) # [batch, memory_axis, F] # Compute scalar similarity between internal state and each memory # Equivalent to dot product between u[-1] and each memory in m_A dotted = ng.sum(u[-1] * m_A, reduction_axes=[self.embedding_axis]) probs = ng.softmax(dotted, self.memory_axis) # [batch, memory_axis] # Embed values with same embedding as keys, or new LUTs if self.use_v_luts: m_emb_C = self.LUTs_C[hopn](value) else: m_emb_C = self.LUT_A(value) m_C = ng.sum(m_emb_C * self.pos_enc, reduction_axes=[self.sentence_rec_axis]) # Compute weighted sum of output embeddings o_k = ng.sum(probs * m_C, reduction_axes=[self.memory_axis]) # [batch, F] u_k = u[-1] + o_k # [batch, F] # Add new internal state u.append(u_k) # Compute predicted answer from product of final internal state and final LUT weight matrix if self.use_v_luts: a_logits = ng.dot(self.LUTs_C[-1].W, u[-1]) # [batch, V] else: a_logits = ng.dot(self.LUT_A.W, u[-1]) # [batch, V] # rename V to vocab_axis to match answer a_logits = ng.cast_axes(a_logits, [self.vocab_axis, self.batch_axis]) a_pred = ng.softmax(a_logits, self.vocab_axis) return a_pred, a_logits
def Times(self, cntk_op, inputs): """ Returns input[0] x input[1] (matrix multiplication). Arguments: inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = inputs if len(cast_0.axes) == 1 & len(cast_1.axes) == 1: pass elif len(cast_0.axes) == 1: temp = next((x for x in cast_1.axes if x.length == 1), None) if temp is None: temp = ng.make_axis(1) cast_0 = ng.broadcast(cast_0, [temp, cast_0.axes]) elif len(cast_1.axes) == 1: temp = next((x for x in cast_0.axes if x.length == 1), None) if temp is None: temp = ng.make_axis(1) cast_1 = ng.broadcast(cast_1, [ng.make_axis(1), cast_1.axes]) cast_0 = ng.cast_axes(cast_0, [cast_0.axes[0], cast_1.axes[0]]) return ng.dot(cast_0, cast_1).named(cntk_op.uid)
def test_one_dot_bprop_allreduce(config): c = config pytest.xfail( "GPU child transformers generate errors during AssignLayouts graph pass #1651" ) H_axis = ng.make_axis(length=4, name='height') W_axis = ng.make_axis(length=6, name='width') with ng.metadata(step='input'): X = ng.placeholder(axes=[H_axis, W_axis]) target = ng.constant(1, axes=[W_axis]) with ng.metadata(device_id=c['device_id'], parallel=W_axis): W = ng.variable(axes=[H_axis], initial_value=UniformInit(1, 1)) dot = ng.dot(W, X) L = ng.squared_L2(target - dot, out_axes=()) grad = ng.deriv(L, W) grad.metadata['reduce_func'] = c['func'] update = (W - grad) with closing(ngt.make_transformer_factory('hetr')()) as hetr: out_comp = hetr.computation([update], X) result = out_comp(c['input']) np.testing.assert_array_equal(result, c['expected_result'])
def MatMul(self, tf_node, inputs): """ Multiplies matrix `a` by matrix `b`. The inputs must be two-dimensional, the inner dimensions must match (possibly after transpose). Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: a, b, transpose_a, transpose_b, a_is_sparse, b_is_sparse, name """ # get inputs left, right = inputs if tf_node.attr['transpose_a'].b: left = ng.Transpose(left) if tf_node.attr['transpose_b'].b: right = ng.Transpose(right) # check shape assert len(left.axes) == len(right.axes) == 2 assert left.axes[1].length == right.axes[0].length # cast axis left_casted = ng.cast_axes(left, [left.axes[0], right.axes[0] - 1]) # result op result_op = ng.dot(left_casted, right, name=tf_node.name) # return return result_op
def Times(self, cntk_op, inputs): """ Returns input[0] x input[1] (matrix multiplication). Arguments: cntk_op: CNTK operation to be imported. inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = inputs cast_0_len = len(cast_0.axes) cast_1_len = len(cast_1.axes) if cast_0_len == cast_1_len == 1: if cast_0.axes[0] != cast_1.axes[0]: cast_0 = ng.cast_axes(cast_0, cast_1.axes) elif cast_0_len == cast_1_len: if cast_0.axes[1].length == cast_1.axes[0].length: axes = [cast_0.axes[0], cast_1.axes[0]] axes.extend(cast_0.axes[2::]) cast_0 = ng.cast_axes(cast_0, axes=axes) else: axes = self._match_axes(cast_0.axes, cast_1.axes) cast_0 = ng.cast_axes(cast_0, axes=axes) elif cast_0_len > cast_1_len: axes = self._match_axes(cast_0.axes, cast_1.axes) cast_0 = ng.cast_axes(cast_0, axes=axes) else: axes = self._match_axes(cast_1.axes, cast_0.axes) cast_1 = ng.cast_axes(cast_1, axes=axes) return ng.dot(cast_0, cast_1).named(cntk_op.uid)
def test_flat_tensor_dot_tensor(): """ Ensure that a flattened argument axis is not unflattend in the result. """ H = ng.make_axis(2) W = ng.make_axis(7) C = ng.make_axis(3) K = ng.make_axis(11) axes_a = ng.make_axes([H, W, C]) a = ng.constant(np.ones(axes_a.lengths), axes=axes_a) flat_a = ng.flatten_at(a, 2) axes_b = ng.make_axes([C, K]) b = ng.constant(np.ones(axes_b.lengths), axes=axes_b) result = ng.dot(b, flat_a) with ExecutorFactory() as factory: result_fun = factory.executor(result) result_val = result_fun() result_correct = np.ones_like(result_val) * C.length ng.testing.assert_allclose(result_val, result_correct)
def matmul(left, right, transpose_a=False, transpose_b=False, name=None): """ Only support 2d matmul for now. """ # Transpose if transpose_a: left = ng.Transpose(left) if transpose_b: right = ng.Transpose(right) # Check shape assert len(left.axes) == len(right.axes) == 2 assert left.axes[1].length == right.axes[0].length # step 1: cast left (pos_1, pos_0), right (pos_1, pos_0) => # left (temp , pos_1), right (pos_1, pos_0) # step 2: perform left dot right, result # (temp, pos_0) # step 3: cast back to (post_1, pos_0) left_temp_axes = ng.make_axes( [ng.make_axis(left.axes[0].length), right.axes[0]]) left = ng.cast_axes(left, axes=left_temp_axes) # Result op result_op = ng.dot(left, right).named(name) result_op = cast_to_pos_axes(result_op) # Return return result_op.named(name)
def test_gemm(transformer_factory): """ TODO: make this more interesting """ n, c = 32, 32 N = ng.make_axis(length=n, name='N') C = ng.make_axis(length=c) X = ng.placeholder(axes=[C, N]) Y = ng.placeholder(axes=[N]) W = ng.variable(axes=[C - 1], initial_value=0.1) Y_hat = ng.dot(W, X) with executor(Y_hat, X) as ex: mm_executor = ex w = np.ones(c) * 0.1 xs = np.ones(n * c).reshape(c, n) for ii in range(3): y_hat_val = mm_executor(xs) # 8.8 fixed point test # assert np.allclose(np.dot(xs, w) - y_hat_val, 0.075*np.ones(n)) # autoflex test assert_allclose(np.dot(xs, w), y_hat_val)
def Gemm(onnx_node, ng_inputs): # type: (NodeWrapper, List[NgraphNode]) -> NgraphNode """Calculate general matrix multiplication Y = alpha * (A @ B) + beta * C.""" input_a, input_b, input_c = ng_inputs alpha = onnx_node.get_attribute_value('alpha', 1) # Scalar multiplier for A @ B beta = onnx_node.get_attribute_value('beta', 1) # Scalar multiplier for input tensor C broadcast = onnx_node.get_attribute_value('broadcast', 1) # Should C be broadcast? trans_a = onnx_node.get_attribute_value('transA', False) # Should A be transposed? trans_b = onnx_node.get_attribute_value('transB', False) # Should B be transposed? if trans_a: input_a = transpose(input_a) if trans_b: input_b = transpose(input_b) # onnx-tensorflow: https://github.com/onnx/onnx-tensorflow/ # blob/17075f44c9071600beccfc62c92b22d1cd957bfd/onnx_tf/backend.py#L711 # They have hardcoded flatten input `A` before transposition. # # Firstly, we check whether input data have incompatible shapes and then try flatten input data. if not has_matmul_compatible_shapes(input_a.shape, input_b.shape): input_a = flatten_innermost_empty_dims(input_a) input_b = flatten_innermost_empty_dims(input_b) if not has_matmul_compatible_shapes(input_a.shape, input_b.shape): raise ValueError('Gemm node (%s): input "A" and "B" data shapes are incompatible to ' 'multiply with each other.', onnx_node.name) a_dot_b = ng.dot(input_a, input_b) if not broadcast and input_c.shape != a_dot_b.shape: raise ValueError('Gemm node (%s): input data shapes are incompatible and broadcast ' ' was not requested!', onnx_node.name) return alpha * a_dot_b + beta * input_c
def template_dot_one_placeholder_and_scalar(row, col, scalar, flex_exceptions, iters): arg_array = np.array([i for i in range(col * row)]).reshape(row, col) ng_placeholder = template_create_placeholder(row, col) ng_var = ng.placeholder(()) ng_fun = ng.dot(ng_var, ng_placeholder) flex_exceptions_index = 0 print("Initial scalar: ", scalar) print("Matrix:\n", arg_array) with executor(ng_fun, ng_var, ng_placeholder) as m_executor: for i in range(row): print("Iteration " + str(i + 1)) ng_op_out = m_executor(scalar, arg_array) np_op_out = np.dot(scalar, arg_array) # After each iteration matrix values are updated. arg_array = ng_op_out print("Flex dot product result: \n", ng_op_out) print("Numpy dot product result: \n", np_op_out) try: assert_allclose(ng_op_out, np_op_out) except AssertionError: print( "Flex dot product result doesn't match to numpy.\n" "Try to check if flex result is inside flex exceptions list" ) print("Flex dot product result: \n", ng_op_out) print("Current array inside flex exceptions list: \n", flex_exceptions[flex_exceptions_index]) assert_allclose(ng_op_out, flex_exceptions[flex_exceptions_index]) # Iterate to the next element of flex exceptions list flex_exceptions_index += 1
def Gemm(onnx_node, ng_inputs): # type: (NodeWrapper, List[NgraphNode]) -> NgraphNode """Calculate general matrix multiplication Y = alpha * (A @ B) + beta * C. Support is currently limited to 2D matrices only. Higher dimensional tensors will be flattened to 2D before multiplication. """ input_a, input_b, input_c = ng_inputs alpha = onnx_node.get_attribute_value('alpha', 1) # Scalar multiplier for A @ B beta = onnx_node.get_attribute_value( 'beta', 1) # Scalar multiplier for input tensor C trans_a = onnx_node.get_attribute_value('transA', False) # Should A be transposed? trans_b = onnx_node.get_attribute_value('transB', False) # Should B be transposed? if trans_a: input_a = transpose(input_a) if trans_b: input_b = transpose(input_b) input_a, input_b = reshape_for_matmul(onnx_node, input_a, input_b) a_dot_b = ng.dot(input_a, input_b) if alpha != 1: a_dot_b = alpha * a_dot_b if beta != 1: input_c = beta * input_c _, input_c = numpy_style_broadcast_for_binary_operation( onnx_node, [a_dot_b, input_c]) return a_dot_b + input_c
def test_reduce_vector(hetr_device): """ A whole vector is produced on each worker and should be reduced before being returned, but not along its axes since it does not have the parallel axis in its axes """ if hetr_device == 'gpu': pytest.xfail("broadcast communication ops not yet supported on gpus") H = ng.make_axis(length=4, name='height') N = ng.make_axis(length=8, name='batch') weight = ng.make_axis(length=2, name='weight') x = ng.placeholder(axes=[N, H]) w = ng.placeholder(axes=[H, weight]) with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N): dot = ng.dot(x, w) out = ng.sum(dot, N) np_x = np.random.randint(100, size=[N.length, H.length]) np_weight = np.random.randint(100, size=[H.length, weight.length]) with closing(ngt.make_transformer_factory( 'hetr', device=hetr_device)()) as transformer: computation = transformer.computation(out, x, w) res = computation(np_x, np_weight) # TODO should the reduce infer a sum or mean? expected = np.sum(np.dot(np_x, np_weight), 0) / 2. np.testing.assert_array_equal(res, expected)
def Gemm(onnx_node, ng_inputs): # type: (NodeWrapper, List[TensorOp]) -> Op # Y = alpha * (A @ B) + beta * C input_a, input_b, input_c = ng_inputs alpha = onnx_node.get_attribute_value('alpha', 1) # Scalar multiplier for A @ B beta = onnx_node.get_attribute_value( 'beta', 1) # Scalar multiplier for input tensor C broadcast = onnx_node.get_attribute_value('broadcast', 1) # Should C be broadcast? trans_a = onnx_node.get_attribute_value('transA', False) # Should A be transposed? trans_b = onnx_node.get_attribute_value('transB', False) # Should B be transposed? if not broadcast: logger.warning( 'Gemm node (%s): import does not support broadcast value %s', onnx_node.name, broadcast) if trans_a: input_a = ng.Transpose(input_a) if trans_b: input_b = ng.Transpose(input_b) input_a, input_b = cast_axes_for_matmul(input_a, input_b) a_dot_b = ng.dot(input_a, input_b) a_dot_b = cast_to_pos_axes(a_dot_b) return alpha * a_dot_b + beta * input_c
def build_graphs(L, BS): """ TODO. Arguments: L: TODO BS: TODO Returns: TODO """ # Axes L = [ng.make_axis(length=N, name='L%d' % i) for i, N in enumerate(L)] BS = ng.make_axis(length=BS, name='BS') # Builds Network activations = [ng.tanh for i in range(len(L) - 2)] + [ng.softmax] X = ng.placeholder((L[0], BS)).named('X') Y = ng.placeholder((L[-1], )).named('Y') W = [ ng.variable((L_np1, L_n - 1)).named('W%d' % i) for i, (L_np1, L_n) in enumerate(zip(L[1:], L[:-1])) ] A = [] for i, f in enumerate(activations): Aim1 = A[i - 1] if i > 0 else X A.append(f(ng.dot(W[i], Aim1))) Error = ng.cross_entropy_multi(A[-1], Y) dW = [ng.deriv(Error, w) for w in W] transformer = ngt.make_transformer() dfg = an.DataFlowGraph(transformer, dW) ifg = an.InterferenceGraph(dfg.liveness()) return dfg, ifg
def FC(self, c2_op, inputs): """ Multiplies matrix `a` by matrix `b`. The inputs must be two-dimensional, the inner dimensions must match (possibly after transpose). Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. Inputs to c2_op: a, b, transpose_a, transpose_b, a_is_sparse, b_is_sparse, name """ # get inputs left, right, bias = inputs # check shape assert left.axes[1].length == right.axes[1].length # cast axis left_cast = ng.cast_axes(left, [left.axes[0], right.axes[1]]) # add op dot_op = ng.dot(left_cast, right) # cast bias axis bias_cast = ng.cast_axes(bias, [dot_op.axes[-1]]) # result op result_op = ng.add(dot_op, bias_cast) return result_op
def test_evalutaion_twice(transformer_factory): """Test executing a computation graph twice on a one layer MLP.""" C = ng.make_axis(name='C') W = ng.make_axis(name='W') D = ng.make_axis(name='D') C.length = 2 D.length = 2 W.length = 1 x = ng.constant(np.array([[1, 2], [3, 4]], dtype='float32'), ng.make_axes([C, D])) hidden1_weights = ng.constant(np.array([[1], [1]], dtype='float32'), ng.make_axes([C - 1, W])) hidden1_biases = ng.constant(np.array([[2], [2]], dtype='float32'), ng.make_axes([D, W])) hidden1 = ng.dot(hidden1_weights, x) + hidden1_biases comp = executor(hidden1) result_1 = comp() result_2 = comp() assert np.array_equal(result_1, result_2)
def train_outputs(self, in_obj): out_axes = ng.make_axes(self.axes or [ng.make_axis(self.nout).named('Hidden')]) in_axes = in_obj.axes.sample_axes() in_axes = in_axes - in_axes.recurrent_axes() w_axes = out_axes - out_axes.recurrent_axes() + [axis - 1 for axis in in_axes] if self.W is None: self.W = ng.variable(axes=w_axes, initial_value=self.init(w_axes.lengths)) return ng.dot(self.W, in_obj)
def _step(self, inp, states): h_state = states[0] c_state = states[1] ifog = { k: ng.dot(self.W_input[k], inp) + ng.dot(self.W_recur[k], h_state) + self.b[k] for k in self.metadata['gates'] } ifog_act = { k: self.activation(ifog[k]) if k is 'g' else self.gate_activation(ifog[k]) for k in self.metadata['gates'] } c = ifog_act['f'] * c_state + ifog_act['i'] * ifog_act['g'] # c_prev is the state before applying activation h = ifog_act['o'] * self.activation(c) return [h, c]
def __call__(self, in_obj): if self.W is None: self.W = ng.variable( axes=ng.make_axes(self.axes_map.keys()) + in_obj.axes.feature_axes(), initial_value=self.init, scope=self.scope, ).named('LinW') # in the event that the in_obj feature axes and the output feature axes # share axis names, self.W will have duplicate axes, which are not # allowed. To get around this, we rename the output feature axes to # something unique that we can undo after the dot. This map_roles is # undoing this temporary axes name change. return ng.map_roles(ng.dot(self.W, in_obj), self.axes_map)
def test_flatten_deriv_simplified(): """ Test derivative with dot and flatten """ ax_N = ng.make_axis(length=3) ax_Y = ng.make_axis(length=2) x = ng.placeholder(ng.make_axes([ax_N])) w = ng.constant([5, 2], axes=ng.make_axes([ax_Y])) logits = ng.dot(x, w) cost = ng.sum(logits, reduction_axes=logits.axes) delta = 0.001 u = rng.uniform(.1, 5.0, x.axes) check_derivative(cost, x, delta, u, atol=1e-2, rtol=1e-2)
def test_distributed_dot(transformer_factory): H = ng.make_axis(length=4, name='height') N = ng.make_axis(length=8, name='batch') weight = ng.make_axis(length=2, name='weight') x = ng.placeholder(axes=[H, N]) w = ng.placeholder(axes=[weight, H]) with ng.metadata(device_id=('1', '2'), parallel=N): dot = ng.dot(w, x) np_x = np.random.randint(100, size=[H.length, N.length]) np_weight = np.random.randint(100, size=[weight.length, H.length]) with ExecutorFactory() as ex: computation = ex.executor(dot, x, w) res = computation(np_x, np_weight) np.testing.assert_array_equal(res, np.dot(np_weight, np_x))
def template_dot_two_placeholders(rows_1, col_1, col_2): ng_placeholder2, ng_placeholder1 = \ template_create_placeholders_for_multiplication(col_2, col_1, rows_1) ng_fun = ng.dot(ng_placeholder1, ng_placeholder2) arg_array1 = np.array([i for i in range(col_1 * rows_1) ]).reshape(rows_1, col_1) arg_array2 = np.array([i for i in range(col_1 * col_2) ]).reshape(col_1, col_2) print("Matrix 1:\n", arg_array1) print("Matrix 2:\n", arg_array2) with executor(ng_fun, ng_placeholder1, ng_placeholder2) as mm_executor: np_op_out = np.dot(arg_array1, arg_array2) ng_op_out = mm_executor(arg_array1, arg_array2) print("Flex dot product result: \n", ng_op_out) print("Numpy dot product result: \n", np_op_out) assert_allclose(ng_op_out, np_op_out)
def test_gdm(random_learning_rate, random_momentum_coef, wdecay, nesterov, transformer_factory): # Setup the baseline and reference optimizers to be tested gdm_args = { 'learning_rate': random_learning_rate, 'momentum_coef': random_momentum_coef, 'wdecay': wdecay, 'nesterov': nesterov } gdm_reference = GDMReference(**gdm_args) gdm = GradientDescentMomentum(**gdm_args) # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using GDM np_W = np.random.rand(C.length) W = ng.variable([C], initial_value=np_W) # Set up op graph cost = ng.sum(target - ng.dot(W, data), out_axis=()) updated_weights = ng.sequential([gdm(cost), W]) def data_generator(iteration_count): for i in range(iteration_count): yield (np.random.rand(C.length, N.length).astype('float32'), np.random.rand(N.length).astype('float32')) # Set up the computation and run the "train" loop with ExecutorFactory() as ex: gdm_baseline = ex.transformer.computation(updated_weights, data, target) mock_dataset = data_generator(20) for x, y in mock_dataset: ng_W = gdm_baseline(x, y) # updated weights for ngraph optimizer np_W = gdm_reference( x, np_W) # updated weights for reference optimizer ng.testing.assert_allclose(np_W, ng_W, rtol=1e-3)
def test_distributed_dot_parallel_second_axis(): pytest.xfail("'parallel' for not first axis isn't supported yet") H = ng.make_axis(length=4, name='height') N = ng.make_axis(length=8, name='batch') weight = ng.make_axis(length=2, name='weight') x = ng.placeholder(axes=[H, N]) w = ng.placeholder(axes=[weight, H]) with ng.metadata(device_id=('0', '1'), parallel=N): dot = ng.dot(w, x) np_x = np.random.randint(100, size=[H.length, N.length]) np_weight = np.random.randint(100, size=[weight.length, H.length]) with ExecutorFactory() as ex: computation = ex.executor(dot, x, w) res = computation(np_x, np_weight) np.testing.assert_array_equal(res, np.dot(np_weight, np_x))
def test_cputensor_dot(transformer_factory): Y = ng.make_axis(length=2) M = ng.make_axis(length=1) N = ng.make_axis(length=3) np_a = np.array([[1, 2, 3]], dtype=np.float32) np_b = np.array([[1, 2], [2, 3], [3, 4]], dtype=np.float32) np_c = np.dot(np_a, np_b) a = ng.constant(np_a, [M, N]).named('a') b = ng.constant(np_b, [N, Y]).named('b') c = ng.dot(a, b) with executor(c) as ex: result = ex() assert np.array_equal(result, np_c)
def _step(self, h_ff, states): h_state = states[0] c_state = states[1] ifog = { k: sum([ng.cast_role(h_ff[k], self.out_axes), ng.cast_role(ng.dot(self.W_recur[k], h_state), self.out_axes), self.b[k], ]) for k in self.metadata['gates'] } ifog_act = {k: self.activation(ifog[k]) if k is 'g' else self.gate_activation(ifog[k]) for k in self.metadata['gates']} c = ifog_act['f'] * c_state + ifog_act['i'] * ifog_act['g'] # c_prev is the state before applying activation h = ifog_act['o'] * self.activation(c) h = ng.cast_role(h, self.out_axes) return [h, c]
def test_recvop_axes_using_dot(transformer_factory): x_value = np.array([[1], [2]]) w_value = np.array([[-1, 1]]) A1 = ng.make_axis(length=1) A2 = ng.make_axis(length=2) A3 = ng.make_axis(length=2) x = ng.placeholder([A2, A1]) w = ng.variable([A1, A3], initial_value=w_value) with ng.metadata(device_id='1'): result = ng.dot(x, w) with ExecutorFactory() as ex: computation = ex.executor(result, x, w) assert ng.equal(computation(x_value, w_value), np.dot(x_value, w_value))
def test_comm_broadcast_op(hetr_device): if hetr_device == 'gpu': pytest.skip('gpu communication broadcast op is not supported.') H = ng.make_axis(length=4, name='height') N = ng.make_axis(length=8, name='batch') weight = ng.make_axis(length=2, name='weight') x = ng.placeholder(axes=[N, H]) # w will be broadcasted to devices w = ng.placeholder(axes=[H, weight]) with ng.metadata(device_id=('0', '1'), parallel=N): dot = ng.dot(x, w) np_x = np.random.randint(100, size=[N.length, H.length]) np_weight = np.random.randint(100, size=[H.length, weight.length]) with ExecutorFactory() as ex: computation = ex.executor(dot, x, w) res = computation(np_x, np_weight) np.testing.assert_array_equal(res, np.dot(np_x, np_weight))
def __call__(self, inputs): query = ng.cast_axes( inputs['user_utt'], [ self.batch_axis, self.sentence_rec_axis]) # Query embedding [batch, sentence_axis, F] q_emb = self.LUT_A(query) # Multiply by position encoding and sum u_0 = ng.sum(q_emb, reduction_axes=[self.sentence_rec_axis]) # Start a list of the internal states of the model. Will be appended to # after each memory hop u = [u_0] for hopn in range(self.nhops): story = ng.cast_axes( inputs['memory'], [ self.batch_axis, self.memory_axis, self.sentence_rec_axis]) # Re-use the query embedding matrix to embed the memory sentences # [batch, memory_axis, sentence_axis, F] m_emb_A = self.LUT_A(story) m_A = ng.sum( m_emb_A, reduction_axes=[ self.sentence_rec_axis]) # [batch, memory_axis, F] # Compute scalar similarity between internal state and each memory # Equivalent to dot product between u[-1] and each memory in m_A # [batch, memory_axis] dotted = ng.sum(u[-1] * m_A, reduction_axes=[self.embedding_axis]) # [batch, memory_axis] probs = ng.softmax(dotted, self.memory_axis) # Renormalize probabilites according to non-empty memories probs_masked = probs * inputs['memory_mask'] renorm_sum = ng.sum( probs_masked, reduction_axes=[ self.memory_axis]) + self.eps probs_renorm = (probs_masked + self.eps) / renorm_sum # Compute weighted sum of memory embeddings o_k = ng.sum( probs_renorm * m_A, reduction_axes=[ self.memory_axis]) # [batch, F] # Add the output back into the internal state and project u_k = ng.cast_axes(ng.dot(self.R_proj, o_k), [ self.embedding_axis, self.batch_axis]) + u[-1] # [batch, F_proj] # Add new internal state u.append(u_k) if self.use_match_type: # [batch_axis, cand_axis, cand_rec_axis, F] self.cands_mat = inputs['cands_mat'] # Embed all candidate responses using LUT_W # [<batch_axis>, cand_axis, cand_rec_axis, F] cand_emb_W = self.LUT_W(self.cands_mat) # No position encoding added yet cands_mat_emb = ng.sum( cand_emb_W, reduction_axes=[ self.cand_rec_axis]) # [<batch_axis>, cand_axis, F] # Compute predicted answer from product of final internal state # and embedded candidate answers # a_logits = ng.dot(cands_mat_emb, u[-1]) # [batch, cand_axis] # [batch, cand_axis] a_logits = ng.sum(u[-1] * cands_mat_emb, reduction_axes=[self.embedding_axis]) # rename V to vocab_axis to match answer a_logits = ng.cast_axes(a_logits, [self.batch_axis, self.cand_axis]) a_pred = ng.softmax(a_logits, self.cand_axis) return a_pred, probs_renorm
# Constants required for masking const_LSTM = ng.constant(axes=[F, dummy_axis], const=1) const_loss = ng.constant(axes=[ax.Y, dummy_axis], const=1) const_LSTM_embed = ng.constant(axes=[F_embed, dummy_axis], const=1) # Create masks reorder_para_mask = ng.axes_with_order( inputs['para_len'], axes=[ dummy_axis, inputs['para_len'].axes[2], N]) reorder_ques_mask = ng.axes_with_order( inputs['question_len'], axes=[ dummy_axis, inputs['question_len'].axes[2], N]) # Masks for question and para after encoding layer mask_para = ng.dot(const_LSTM, reorder_para_mask) mask_question = ng.dot(const_LSTM, ng.cast_axes(reorder_ques_mask, [dummy_axis, REC, N])) # Masks for question and para after embedding/LookupTable layer mask_para_embed = ng.dot(const_LSTM_embed, reorder_para_mask) mask_question_embed = ng.dot( const_LSTM_embed, ng.cast_axes( reorder_ques_mask, [ dummy_axis, REC, N])) # Pass question and para through embedding layer and dropout layers embed_output_para_1 = embed_layer(inputs['para']) embed_output_para = dropout_1(embed_output_para_1, keep=dropout_val) question_inps = ng.cast_axes(inputs['question'], [N, REC]) embed_output_ques_1 = embed_layer(question_inps)