def test_linear_ones(basic_linargs, transformer_factory): # basic sanity check with all ones on the inputs # and weights, check that each row in output # is the sum of the weights for that output # this check will confirm that the correct number # of operations is being run nin, nout, batch_size = basic_linargs # set inputs N = ng.make_axis(batch_size, name="N", batch=True) F = ng.make_axis(nin, name="F") inp = ng.placeholder([F, N]) layer = Linear(nout=nout, init=UniformInit(1.0, 1.0)) fprop = layer.train_outputs(inp) # create data x = np.ones((nin, batch_size)) # evaluate ngt.make_transformer() out, w = executor([fprop, layer.W], inp)(x) sums = np.sum(w, 1).reshape((nout, 1)) * np.ones((1, batch_size)) assert np.allclose(sums, out, atol=0.0, rtol=0.0), '%e' % np.max(np.abs(out - sums))
def test_linear_keep_batch_axis(): feature_axis = ng.make_axis(1, name='A') batch_axis = ng.make_axis(2, name='N') x = ng.placeholder([batch_axis]) linear = Linear(axes=feature_axis, keep_axes=[batch_axis], init=UniformInit(1.0, 1.0))(x) assert linear.axes == ng.make_axes([feature_axis, batch_axis])
def test_linear_axes_nout(): feature_axis = ng.make_axis(1, name='A') batch_axis = ng.make_axis(2, name='N') x = ng.placeholder([feature_axis, batch_axis]) linear = Linear(nout=3, init=UniformInit(1.0, 1.0))(x) assert feature_axis not in linear.axes assert batch_axis in linear.axes assert linear.axes.batch_axis().length == 2 assert linear.axes.sample_axes().lengths == (3, )
def test_inference_reuse_linear(input_placeholder): layer = Linear(dummy_init, 10) layer(input_placeholder) train_params = (layer.W, ) with Layer.inference_mode_on(): layer(input_placeholder) inference_params = (layer.W, ) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def test_linear_zeros(basic_linargs, transformer_factory): # basic sanity check with 0 weights random inputs nin, nout, batch_size = basic_linargs # set inputs N = ng.make_axis(batch_size, name="N", batch=True) F = ng.make_axis(nin, name="F") inp = ng.placeholder([F, N]) layer = Linear(nout=nout, init=UniformInit(0.0, 0.0)) fprop = layer.train_outputs(inp) # create data x = np.random.random((nin, batch_size)) # evaluate ngt.make_transformer() out = executor(fprop, inp)(x) assert np.min(out) == 0.0 and np.max(out) == 0.0
def test_linear_zeros(input_placeholder, output_size): # basic sanity check with 0 weights random inputs x = np.random.random(input_placeholder.axes.lengths) layer = Linear(nout=output_size, init=UniformInit(0.0, 0.0)) with ExecutorFactory() as ex: if ex.transformer.transformer_name == 'hetr': pytest.xfail("hetr fork-safe issue on mac") comp = ex.executor(layer(input_placeholder), input_placeholder) output_values = comp(x) assert np.min(output_values) == 0.0 and np.max(output_values) == 0.0
def test_linear_W_axes_nout(): feature_axis = ng.make_axis(1, name='A') batch_axis = ng.make_axis(2, name='N') x = ng.placeholder([feature_axis, batch_axis]) linear = Linear(nout=3, init=UniformInit(1.0, 1.0)) linear(x) assert linear.W.axes.batch_axis() is None assert feature_axis in linear.W.axes assert len(linear.W.axes - feature_axis) == 1 assert (linear.W.axes - feature_axis)[0].length == 3
def test_linear_ones(input_size, input_placeholder, output_size): # basic sanity check with all ones on the inputs and weights, check that # each row in output is the sum of the weights for that output this check # will confirm that the correct number of operations is being run x = np.ones(input_placeholder.axes.lengths) layer = Linear(nout=output_size, init=UniformInit(1.0, 1.0)) with ExecutorFactory() as ex: if ex.transformer.transformer_name == 'hetr': pytest.xfail("hetr fork-safe issue on mac") out = layer(input_placeholder) comp = ex.executor([out, layer.W], input_placeholder) output_values, w = comp(x) ng.testing.assert_allclose(np.ones(out.axes.lengths) * input_size, output_values, atol=0.0, rtol=0.0)
def test_linear_invalid_batch_axes(): with pytest.raises(ValueError): Linear(axes=ng.make_axis(1, name='N'), init=UniformInit(1.0, 1.0))
def test_linear_invalid_shadow_axes(): with pytest.raises(ValueError): Linear(axes=make_shadow_axis(ng.make_axis(1, name='A')), init=UniformInit(1.0, 1.0))
def test_linear_accepts_axes_axis(): """ Ensure that Linear.__init__ accepts an Axis as axes """ Linear(axes=ng.make_axis(1), init=UniformInit(1.0, 1.0))
def __init__(self, params_dict, nout, init, init_h2h=None, bias_init=None, activation=None, gate_activation=None, batch_norm=False, reset_cells=True, **kwargs): super(MatchLSTMCell_withAttention, self).__init__(**kwargs) self.init = params_dict['init'] max_question = params_dict['max_question'] max_para = params_dict['max_para'] hidden_size = nout # Axes # Axis for length of the hidden units self.hidden_rows = ng.make_axis(length=hidden_size, name='hidden_rows') # Axis for length of the hidden units self.F = ng.make_axis(length=hidden_size, name='F') # Axis for length of max question length self.hidden_cols_ques = ng.make_axis(length=max_question, name='hidden_cols_ques') # Axis with length of embedding sizes self.embed_axis = ng.make_axis(length=params_dict['embed_size'], name='embed_axis') # Recurrent axis for max question length self.REC = ng.make_axis(length=max_question, name='REC') # axis with size 1 self.dummy_axis = ng.make_axis(length=1, name='dummy_axis') # Axis for batch size self.N = ng.make_axis(length=params_dict['batch_size'], name='N') # Axis for the output of match lstm cell self.lstm_feature = ng.make_axis(length=2 * hidden_size, name='lstm_feature') # Length of final classification layer (maximum length of the # paragraph) self.ax = params_dict['ax'] self.ax.Y.length = max_para # Variables to be learnt during training (part of the attention network) # naming convention taken from teh paper self.W_p = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init) self.W_q = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init) self.W_r = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init) self.b_p = ng.variable(axes=self.hidden_rows, initial_value=self.init) self.w_lr = ng.variable(axes=[self.hidden_rows], initial_value=self.init) # Constants for creating masks and initial hidden states self.e_q = ng.constant(axes=[self.dummy_axis, self.hidden_cols_ques], const=np.ones([1, max_question])) self.e_q2 = ng.constant(axes=[self.F, self.dummy_axis], const=1) self.h_r_old = ng.constant(axes=[self.F, self.N], const=0) # Define variables for implementing the stacking operation. the default # stack op seems to be slow L1 = np.vstack( (np.eye(hidden_size), np.zeros([hidden_size, hidden_size]))) L2 = np.vstack((np.zeros([hidden_size, hidden_size]), np.eye(hidden_size))) self.ZX = ng.constant(const=L1, axes=[self.lstm_feature, self.F]) self.ZY = ng.constant(const=L2, axes=[self.lstm_feature, self.F]) # LSTM Cell Initialization (Code from the standard LSTM Cell in ngraph) self.nout = nout self.init = init self.init_h2h = init_h2h if init_h2h is not None else init self.bias_init = bias_init self.activation = activation if gate_activation is not None: self.gate_activation = gate_activation else: self.gate_activation = self.activation self.batch_norm = batch_norm self.reset_cells = reset_cells self.i2h = {} self.h2h = {} self.gate_transform = {} self.gate_output = {} for gate in self._gate_names: self.h2h[gate] = Linear(nout=self.nout, init=self.init_h2h[gate]) self.i2h[gate] = Affine(axes=self.h2h[gate].axes, weight_init=self.init[gate], bias_init=self.bias_init[gate], batch_norm=self.batch_norm) if gate is 'g': self.gate_transform[gate] = self.activation else: self.gate_transform[gate] = self.gate_activation self.out_axes = None
def __init__(self, params_dict, nout, init, init_h2h=None, bias_init=None, activation=None, gate_activation=None, batch_norm=False, reset_cells=True, **kwargs): super(AnswerPointer_withAttention, self).__init__(**kwargs) self.init_axes = params_dict['init'] max_question = params_dict['max_question'] max_para = params_dict['max_para'] hidden_size = nout # Axes # Axis for length of the hidden units self.hidden_rows = ng.make_axis(length=hidden_size, name='hidden_rows') # Axis for length of max_para self.hidden_cols_para = ng.make_axis(length=max_para, name='hidden_cols_para') # Axis for length of hidden unit size self.F = ng.make_axis(length=hidden_size, name='F') # Axis for length of max_question self.REC = ng.make_axis(length=max_question, name='REC') # Axis with length 1 self.dummy_axis = ng.make_axis(length=1, name='dummy_axis') # Axis with length of batch_size self.N = ng.make_axis(length=params_dict['batch_size'], name='N') # Axis with twice the length of hidden sizes self.lstm_feature_new = ng.make_axis(length=2 * hidden_size, name='lstm_feature') self.ax = params_dict['ax'] # Length of final classification layer (maximum length of the # paragraph) self.ax.Y.length = max_para # Variables self.V_answer = ng.variable( axes=[self.hidden_rows, self.lstm_feature_new], initial_value=self.init_axes) self.W_a = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init_axes) self.b_a = ng.variable(axes=self.hidden_rows, initial_value=self.init_axes) self.e_q = ng.constant(axes=[self.dummy_axis, self.hidden_cols_para], const=np.ones([1, max_para])) self.e_q2 = ng.constant(axes=[self.lstm_feature_new, self.dummy_axis], const=1) self.v_lr = ng.variable(axes=[self.hidden_rows], initial_value=self.init_axes) self.W_RNNx = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init_axes) self.W_RNNh = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init_axes) # LSTM Cell Initialization self.nout = nout self.init = init self.init_h2h = init_h2h if init_h2h is not None else init self.bias_init = bias_init self.activation = activation if gate_activation is not None: self.gate_activation = gate_activation else: self.gate_activation = self.activation self.batch_norm = batch_norm self.reset_cells = reset_cells self.i2h = {} self.h2h = {} self.gate_transform = {} self.gate_output = {} for gate in self._gate_names: self.h2h[gate] = Linear(nout=self.nout, init=self.init_h2h[gate]) self.i2h[gate] = Affine(axes=self.h2h[gate].axes, weight_init=self.init[gate], bias_init=self.bias_init[gate], batch_norm=self.batch_norm) if gate is 'g': self.gate_transform[gate] = self.activation else: self.gate_transform[gate] = self.gate_activation self.out_axes = None
def __init__(self): super(LinearLayer, self).__init__() self.layer = Linear(ConstantInit(0.0), nout=10)