def test_lookuptable_ones_error(backend_default, basic_linargs): nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size-1, size=nin*batch_size) layer.configure(nin) layer.allocate() inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin*batch_size): assert np.all(W[:, inp[i]] == out[:, i]) err = dtypeu(np.ones((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).asnumpyarray() dw = layer.dW.asnumpyarray() unqidx, count = np.unique(inp, return_counts=True) dw_exp = np.zeros((1, nout)) for wrd_id, cnt in zip(unqidx, count): dw_exp = err[:, 0] * cnt assert np.all(dw_exp == dw[:, wrd_id]) return
def test_lookuptable_zeros_error(backend_default, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size-1, size=nin*batch_size) layer.configure(nin) layer.allocate() inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin*batch_size): assert np.all(W[:, inp[i]] == out[:, i]) err = dtypeu(np.zeros((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).asnumpyarray() dw = layer.dW.asnumpyarray() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_lookuptable_ones_error(backend_default, basic_linargs): nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.set_deltas([layer.be.iobuf(nin)]) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.ones((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() unqidx, count = np.unique(inp, return_counts=True) dw_exp = np.zeros((1, nout)) for wrd_id, cnt in zip(unqidx, count): dw_exp = err[:, 0] * cnt assert np.all(dw_exp == dw[wrd_id, :]) return
def test_lookuptable_zeros_error(backend_default, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.set_deltas([layer.be.iobuf(nin)]) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.zeros((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_lookuptable_rand_error(backend_default, basic_linargs): nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.set_deltas([layer.be.iobuf(nin)]) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.random.random((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() unqidx, count = np.unique(inp, return_counts=True) dw_exp = np.zeros((1, nout)) for wrd_id, cnt in zip(unqidx, count): dw_exp[:] = 0 cnt_exp = 0 for i, w_id in enumerate(inp): if w_id == wrd_id: dw_exp[:] = dw_exp[:] + err[:, i] cnt_exp += 1 assert np.allclose(dw[wrd_id, :], dw_exp, atol=0, rtol=1e-4) assert np.allclose(dw_exp, dw[wrd_id, :], atol=0, rtol=1e-4) assert cnt == cnt_exp return
def test_lookuptable_rand_error(backend_default, basic_linargs, deltas_buffer): nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable(vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() layer.set_deltas(deltas_buffer) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.random.random((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() unqidx, count = np.unique(inp, return_counts=True) dw_exp = np.zeros((1, nout)) for wrd_id, cnt in zip(unqidx, count): dw_exp[:] = 0 cnt_exp = 0 for i, w_id in enumerate(inp): if w_id == wrd_id: dw_exp[:] = dw_exp[:] + err[:, i] cnt_exp += 1 assert np.allclose(dw[wrd_id, :], dw_exp, atol=0, rtol=1e-4) assert np.allclose(dw_exp, dw[wrd_id, :], atol=0, rtol=1e-4) assert cnt == cnt_exp return
def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append( LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [ char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length)) ] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [ DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax()) ] self._model = Model(layers=layers)