def test_fprop(self): r = [] for i in xrange(self.N): repeats = self.rng.random_integers(42) axis = self.rng.randint(2) input_dim, output_dim = self.rng.random_integers(2000, size=2) x = self.get_normal_matrix(input_dim, output_dim) output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qx = Connector(Matrix.from_npa(x)) repeat_block = RepeatBlock(qx, repeats, axis) qx.fprop() repeat_block.fprop() output[processor_type] = repeat_block.output.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
def test_bprop(self): r = [] for i in xrange(self.N): repeats = self.rng.random_integers(42) axis = self.rng.randint(2) input_dim, output_dim = self.rng.random_integers(2000, size=2) x = self.get_normal_matrix(input_dim, output_dim) input_dim = input_dim if axis else input_dim * repeats true_labels = self.rng.randint(output_dim, size=(input_dim, 1)).astype(np.int32) device_id = 0 output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qx = Connector(Matrix.from_npa(x), device_id) qtrue_labels = Connector(Matrix.from_npa(true_labels)) repeat_block = RepeatBlock(qx, repeats, axis) sce_block = SoftmaxCeBlock(repeat_block.output, qtrue_labels) qx.fprop() qtrue_labels.fprop() repeat_block.fprop() sce_block.fprop() sce_block.bprop() repeat_block.bprop() output[processor_type] = qx.backward_matrix.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
def test_theano_fprop(self): r = [] for i in xrange(self.N): repeats = self.rng.random_integers(42) axis = self.rng.randint(2) input_dim, output_dim = self.rng.random_integers(2000, size=2) x = self.get_normal_matrix(input_dim, output_dim) quagga.processor_type = 'gpu' qx = Connector(Matrix.from_npa(x)) repeat_block = RepeatBlock(qx, repeats, axis) qx.fprop() repeat_block.fprop() qoutput = repeat_block.output.to_host() th_x = T.fmatrix() reps = [1, 1] reps[axis] = repeats th_output = T.tile(th_x, reps) th_output = theano.function([th_x], th_output)(x) r.append(np.allclose(qoutput, th_output)) self.assertEqual(sum(r), len(r))
def test_theano_bprop(self): r = [] for i in xrange(self.N): repeats = self.rng.random_integers(42) axis = self.rng.randint(2) input_dim, output_dim = self.rng.random_integers(2000, size=2) x = self.get_normal_matrix(input_dim, output_dim) input_dim = input_dim if axis else input_dim * repeats true_labels = self.rng.randint(output_dim, size=(input_dim, 1)).astype(np.int32) device_id = 0 quagga.processor_type = 'gpu' qx = Connector(Matrix.from_npa(x), device_id) qtrue_labels = Connector(Matrix.from_npa(true_labels)) repeat_block = RepeatBlock(qx, repeats, axis) sce_block = SoftmaxCeBlock(repeat_block.output, qtrue_labels) qx.fprop() qtrue_labels.fprop() repeat_block.fprop() sce_block.fprop() sce_block.bprop() repeat_block.bprop() q_dL_dx = qx.backward_matrix.to_host() th_x = T.fmatrix() th_true_labels = T.ivector() reps = [1, 1] reps[axis] = repeats th_output = T.tile(th_x, reps) th_output = T.nnet.softmax(th_output) loss = T.mean(T.nnet.categorical_crossentropy(th_output, th_true_labels)) get_grads = theano.function([th_x, th_true_labels], T.grad(loss, th_x)) th_dL_dx = get_grads(x, true_labels[:, 0]) r.append(np.allclose(q_dL_dx, th_dL_dx)) self.assertEqual(sum(r), len(r))
'device_id': 0 }, sce_dot_block_b={ 'init': Constant(1, len(vocab)), 'device_id': 0 }) data_block = PtbMiniBatchesGenerator(ptb_train, ptb_valid, batch_size=64, sentence_max_len=100, device_id=0) seq_embd_block = RowSlicingBlock(p['embd_W'], data_block.sentence_batch) # remove last in the list output = List(seq_embd_block.output[:-1], seq_embd_block.output.length - 1) c_fwd_repeat_block = RepeatBlock(p['lstm_fwd_c0'], data_block.sentence_batch.nrows, axis=0, device_id=0) h_fwd_repeat_block = RepeatBlock(p['lstm_fwd_h0'], data_block.sentence_batch.nrows, axis=0, device_id=0) fwd_lstm_block = SequencerBlock( block_class=LstmBlock, params=[p['lstm_fwd_W'], p['lstm_fwd_R'], 0.5], sequences=[output, data_block.mask], output_names=['h'], prev_names=['c', 'h'], paddings=[c_fwd_repeat_block.output, h_fwd_repeat_block.output], reverse=False, device_id=0) # remove first in the list
'device_id': 0}, enc_lstm_R={'init': H5pyInitializer(model_file_name, 'enc_lstm_R'), 'device_id': 0}, dec_lstm_c0={'init': H5pyInitializer(model_file_name, 'dec_lstm_c0'), 'device_id': 0}, dec_lstm_W={'init': H5pyInitializer(model_file_name, 'dec_lstm_W'), 'device_id': 0}, dec_lstm_R={'init': H5pyInitializer(model_file_name, 'dec_lstm_R'), 'device_id': 0}, sce_dot_block_W={'init': H5pyInitializer(model_file_name, 'sce_dot_block_W'), 'device_id': 0}, sce_dot_block_b={'init': H5pyInitializer(model_file_name, 'sce_dot_block_b'), 'device_id': 0}) data_block = DataBlock(train_data, valid_data, 64, word_dropout_prob=0.99, device_id=0) enc_embd_block = RowSlicingBlock(p['embd_W'], data_block.enc_x) enc_c_repeat_block = RepeatBlock(p['enc_lstm_c0'], data_block.enc_x.nrows, axis=0, device_id=0) enc_h_repeat_block = RepeatBlock(p['enc_lstm_h0'], data_block.enc_x.nrows, axis=0, device_id=0) enc_lstm_block = SequencerBlock(block_class=LstmBlock, params=[p['enc_lstm_W'], p['enc_lstm_R'], 0.25], sequences=[enc_embd_block.output, data_block.enc_mask], output_names=['h'], prev_names=['c', 'h'], paddings=[enc_c_repeat_block.output, enc_h_repeat_block.output], reverse=False, device_id=0) dec_embd_block = RowSlicingBlock(p['embd_W'], data_block.dec_x) dec_c_repeat_block = RepeatBlock(p['dec_lstm_c0'], data_block.enc_x.nrows, axis=0, device_id=0) last_selector_block = LastSelectorBlock(enc_lstm_block.h) l2_reg_block = L2RegularizationBlock(last_selector_block.output, 0.001) dec_lstm_block = SequencerBlock(block_class=LstmBlock, params=[p['dec_lstm_W'], p['dec_lstm_R'], 0.25],
# t_lstm_W={'init': get_stacked_orth_R, # 'device_id': 0}, # t_lstm_R={'init': get_stacked_orth_R, # 'device_id': 0}, # sce_dot_block_W={'init': Orthogonal(1024, len(idx_to_char)), # 'device_id': 0}, # sce_dot_block_b={'init': Constant(1, len(idx_to_char)), # 'device_id': 0}) data_block = DataBlock(char_data, char_to_idx, 50, x_device_id=1, y_device_id=0) embd_block = RowSlicingBlock(W=p['embd_W'], row_indexes=data_block.x) f_c_repeat_block = RepeatBlock(p['f_lstm_c0'], data_block.x.nrows, axis=0, device_id=1) f_h_repeat_block = RepeatBlock(p['f_lstm_h0'], data_block.x.nrows, axis=0, device_id=1) f_lstm_rnn_block = SequencerBlock( block_class=LstmBlock, params=[p['f_lstm_W'], p['f_lstm_R'], None], sequences=[embd_block.output, data_block.mask], output_names=['h'], prev_names=['c', 'h'], paddings=[f_c_repeat_block.output, f_h_repeat_block.output], reverse=False, device_id=1) s_c_repeat_block = RepeatBlock(p['s_lstm_c0'],