def test_theano_fprop_matrix(self): r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(300) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len) embd_dim = self.rng.random_integers(10000) batch_size = self.rng.random_integers(500) output_dim = self.rng.random_integers(2000) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32) quagga.processor_type = 'gpu' qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qW = Connector(Matrix.from_npa(W)) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) qW.fprop() qrow_idxs.ncols = sequence_len qrow_idxs.fprop() row_slicing_block.fprop() q_output = row_slicing_block.output.to_host() th_row_idxs = T.imatrix() row_slicing_layer = RowSlicingLayer(W) toutput = row_slicing_layer.get_output_expr(th_row_idxs) th_output = theano.function([th_row_idxs], toutput)(row_idxs) for i in xrange(sequence_len): r.append(np.allclose(q_output[i], th_output[i])) self.assertEqual(sum(r), len(r))
def test_fprop_matrix(self): """ compare `fprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(300) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len) embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32) output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qW = Connector(Matrix.from_npa(W)) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) qW.fprop() qrow_idxs.ncols = sequence_len qrow_idxs.fprop() row_slicing_block.fprop() output[processor_type] = row_slicing_block.output.to_host() for output_gpu, output_cpu in izip(output['gpu'], output['cpu']): r.append(np.allclose(output_gpu, output_cpu)) self.assertEqual(sum(r), len(r))
def test_theano_fprop_vector(self): r = [] for _ in xrange(self.N): embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32) quagga.processor_type = 'gpu' qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qW = Connector(Matrix.from_npa(W)) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) qW.fprop() qrow_idxs.fprop() row_slicing_block.fprop() q_output = row_slicing_block.output.to_host() trow_idxs = T.ivector() row_slicing_layer = RowSlicingLayer(W) t_output = row_slicing_layer.get_output_expr(trow_idxs) t_output = theano.function([trow_idxs], t_output)(row_idxs[:, 0]) r.append(np.allclose(q_output, t_output)) self.assertEqual(sum(r), len(r))
def test_bprop_vector(self): r = [] for _ in xrange(self.N): embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32) true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) device_id = 0 output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qtrue_labels = Connector(Matrix.from_npa(true_labels)) qW = Connector(Matrix.from_npa(W), device_id) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels) qW.fprop() qrow_idxs.fprop() row_slicing_block.fprop() sce_block.fprop() sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) output[processor_type] = qW.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
def test_theano_bprop_matrix(self): r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(300) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(2, max_input_sequence_len) embd_dim = self.rng.random_integers(10000) batch_size = self.rng.random_integers(500) output_dim = self.rng.random_integers(2000) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32) true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)] device_id = 0 quagga.processor_type = 'gpu' qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols) qW = Connector(Matrix.from_npa(W), device_id) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock, params=[], sequences=[row_slicing_block.output, qtrue_labels]) qW.fprop() qrow_idxs.ncols = sequence_len qrow_idxs.fprop() row_slicing_block.fprop() seq_sce_block.fprop() seq_sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) th_row_idxs = T.imatrix() th_true_labels = T.imatrix() row_slicing_layer = RowSlicingLayer(W) toutput = row_slicing_layer.get_output_expr(th_row_idxs) loss = SequentialSoftmaxLayer.get_loss(toutput, th_true_labels) dL_dW = T.grad(loss, row_slicing_layer.W) fun = theano.function([th_row_idxs, th_true_labels], updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)]) fun(row_idxs, np.hstack(true_labels[:sequence_len])) r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value(), atol=1e-5)) self.assertEqual(sum(r), len(r))
def test_theano_bprop_vector(self): r = [] for _ in xrange(self.N): embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32) true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) device_id = 0 quagga.processor_type = 'gpu' qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qW = Connector(Matrix.from_npa(W), device_id) qtrue_labels = Connector(Matrix.from_npa(true_labels)) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels) qtrue_labels.fprop() qW.fprop() qrow_idxs.fprop() row_slicing_block.fprop() sce_block.fprop() sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) th_row_idxs = T.ivector() th_true_labels = T.ivector() row_slicing_layer = RowSlicingLayer(W) toutput = row_slicing_layer.get_output_expr(th_row_idxs) loss = SoftmaxLayer.get_loss(toutput, th_true_labels) dL_dW = T.grad(loss, row_slicing_layer.W) fun = theano.function([th_row_idxs, th_true_labels], updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)]) fun(row_idxs[:, 0], true_labels[:, 0]) r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value())) self.assertEqual(sum(r), len(r))
def test_bprop_matrix(self): r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len) embd_dim = self.rng.random_integers(10000) batch_size = self.rng.random_integers(500) output_dim = self.rng.random_integers(2000) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32) true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)] device_id = 0 output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols) qW = Connector(Matrix.from_npa(W), device_id) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock, params=[], sequences=[row_slicing_block.output, qtrue_labels]) qW.fprop() qrow_idxs.ncols = sequence_len qrow_idxs.fprop() row_slicing_block.fprop() seq_sce_block.fprop() seq_sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) output[processor_type] = qW.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
0 }, sce_dot_block_W={ 'init': Orthogonal(1024, len(vocab)), 'device_id': 0 }, sce_dot_block_b={ 'init': Constant(1, len(vocab)), 'device_id': 0 }) data_block = PtbMiniBatchesGenerator(ptb_train, ptb_valid, batch_size=64, sentence_max_len=100, device_id=0) seq_embd_block = RowSlicingBlock(p['embd_W'], data_block.sentence_batch) # remove last in the list output = List(seq_embd_block.output[:-1], seq_embd_block.output.length - 1) c_fwd_repeat_block = RepeatBlock(p['lstm_fwd_c0'], data_block.sentence_batch.nrows, axis=0, device_id=0) h_fwd_repeat_block = RepeatBlock(p['lstm_fwd_h0'], data_block.sentence_batch.nrows, axis=0, device_id=0) fwd_lstm_block = SequencerBlock( block_class=LstmBlock, params=[p['lstm_fwd_W'], p['lstm_fwd_R'], 0.5], sequences=[output, data_block.mask], output_names=['h'],
'device_id': 1 }, ff_lstm_R={ 'init': lambda: ff_lstm_R, 'device_id': 1 }, sce_dot_block_W={ 'init': lambda: dot_block_W, 'device_id': 1 }, sce_dot_block_b={ 'init': lambda: dot_block_b, 'device_id': 1 }) data_block = DataBlock(char_to_idx, device_id=1) embd_block = RowSlicingBlock(W=p['embd_W'], row_indexes=data_block.char_idx) f_lstm_rnn_block = LstmBlock(p['f_lstm_W'], p['f_lstm_R'], None, embd_block.output, None, p['f_lstm_c0'], p['f_lstm_h0'], device_id=1) s_lstm_rnn_block = LstmBlock(p['s_lstm_W'], p['s_lstm_R'], None, f_lstm_rnn_block.h, None, p['s_lstm_c0'], p['s_lstm_h0'],
enc_lstm_W={'init': H5pyInitializer(model_file_name, 'enc_lstm_W'), 'device_id': 0}, enc_lstm_R={'init': H5pyInitializer(model_file_name, 'enc_lstm_R'), 'device_id': 0}, dec_lstm_c0={'init': H5pyInitializer(model_file_name, 'dec_lstm_c0'), 'device_id': 0}, dec_lstm_W={'init': H5pyInitializer(model_file_name, 'dec_lstm_W'), 'device_id': 0}, dec_lstm_R={'init': H5pyInitializer(model_file_name, 'dec_lstm_R'), 'device_id': 0}, sce_dot_block_W={'init': H5pyInitializer(model_file_name, 'sce_dot_block_W'), 'device_id': 0}, sce_dot_block_b={'init': H5pyInitializer(model_file_name, 'sce_dot_block_b'), 'device_id': 0}) data_block = DataBlock(train_data, valid_data, 64, word_dropout_prob=0.99, device_id=0) enc_embd_block = RowSlicingBlock(p['embd_W'], data_block.enc_x) enc_c_repeat_block = RepeatBlock(p['enc_lstm_c0'], data_block.enc_x.nrows, axis=0, device_id=0) enc_h_repeat_block = RepeatBlock(p['enc_lstm_h0'], data_block.enc_x.nrows, axis=0, device_id=0) enc_lstm_block = SequencerBlock(block_class=LstmBlock, params=[p['enc_lstm_W'], p['enc_lstm_R'], 0.25], sequences=[enc_embd_block.output, data_block.enc_mask], output_names=['h'], prev_names=['c', 'h'], paddings=[enc_c_repeat_block.output, enc_h_repeat_block.output], reverse=False, device_id=0) dec_embd_block = RowSlicingBlock(p['embd_W'], data_block.dec_x) dec_c_repeat_block = RepeatBlock(p['dec_lstm_c0'], data_block.enc_x.nrows, axis=0, device_id=0) last_selector_block = LastSelectorBlock(enc_lstm_block.h) l2_reg_block = L2RegularizationBlock(last_selector_block.output, 0.001) dec_lstm_block = SequencerBlock(block_class=LstmBlock,
'trainable': False }, enc_lstm_W={ 'init': H5pyInitializer(model_file_name, 'enc_lstm_W'), 'device_id': 1, 'trainable': False }, enc_lstm_R={ 'init': H5pyInitializer(model_file_name, 'enc_lstm_R'), 'device_id': 1, 'trainable': False }) data_block = DataBlock(word_to_idx, device_id=1) enc_embd_block = RowSlicingBlock(p['embd_W'], data_block.word_idx) enc_lstm_block = LstmBlock(p['enc_lstm_W'], p['enc_lstm_R'], None, enc_embd_block.output, None, p['enc_lstm_c0'], p['enc_lstm_h0'], device_id=1) encoder_model = Model([p, data_block, enc_embd_block, enc_lstm_block]) def encoder_step(word, begin=False): data_block.word = word if begin: enc_lstm_block.prev_c.assign_npa(enc_lstm_block.f_context, enc_lstm_c0)