def test_fprop(self): """ compare `fprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers( max_input_sequence_len) batch_size = self.rng.random_integers(256) input_dim, hidden_dim = self.rng.random_integers(1500, size=2) x = [ self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len) ] W = self.get_orthogonal_matrix(input_dim, hidden_dim) b = self.rng.rand(1, hidden_dim).astype(np.float32) from quagga.cuda import cudart cudart.cuda_set_device(1) qoutput = {} for reverse in [False, True]: for with_bias in [False, True]: for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qx = List([Connector(Matrix.from_npa(e)) for e in x]) qW = Connector(Matrix.from_npa(W)) qb = Connector( Matrix.from_npa(b)) if with_bias else None seq_dot_block = SequencerBlock(block_class=DotBlock, params=[qW, qb], sequences=[qx], output_names=['output'], reverse=reverse) qx.length = sequence_len qx.fprop() qW.fprop() if qb: qb.fprop() seq_dot_block.fprop() qoutput[processor_type] = seq_dot_block.output.to_host( ) for output_gpu, output_cpu in izip(qoutput['gpu'], qoutput['cpu']): if not np.allclose(output_gpu, output_cpu, atol=1e-5): r.append(False) break else: r.append(True) self.assertEqual(sum(r), len(r))
def test_fprop(self): """ compare `fprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len) batch_size = self.rng.random_integers(256) input_dim, hidden_dim = self.rng.random_integers(1500, size=2) x = [self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len)] W = self.get_orthogonal_matrix(input_dim, hidden_dim) b = self.rng.rand(1, hidden_dim).astype(np.float32) from quagga.cuda import cudart cudart.cuda_set_device(1) qoutput = {} for reverse in [False, True]: for with_bias in [False, True]: for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qx = List([Connector(Matrix.from_npa(e)) for e in x]) qW = Connector(Matrix.from_npa(W)) qb = Connector(Matrix.from_npa(b)) if with_bias else None seq_dot_block = SequencerBlock(block_class=DotBlock, params=[qW, qb], sequences=[qx], output_names=['output'], reverse=reverse) qx.length = sequence_len qx.fprop() qW.fprop() if qb: qb.fprop() seq_dot_block.fprop() qoutput[processor_type] = seq_dot_block.output.to_host() for output_gpu, output_cpu in izip(qoutput['gpu'], qoutput['cpu']): if not np.allclose(output_gpu, output_cpu, atol=1e-5): r.append(False) break else: r.append(True) self.assertEqual(sum(r), len(r))
logger = get_logger('train.log') momentum_policy = FixedValuePolicy(0.95) train_loss_tracker = TrainLossTracker(model, 100, logger) valid_tracker = ValidTracker(model, 500, logger) loss_tracker = LossForValidTracker(logger) valid_tracker.add_observer(loss_tracker) saver = Hdf5Saver(p.trainable_parameters, 5000, 'ptb_parameters.hdf5', logger) trainable_parameters = dict(p.trainable_parameters) sparse_sgd_step = SparseSgdStep([trainable_parameters['embd_W']], FixedValuePolicy(0.01)) del trainable_parameters['embd_W'] nag_step = NagStep(trainable_parameters.values(), FixedValuePolicy(0.01), momentum_policy) # nag_step = SgdStep(trainable_parameters.values(), learning_rate_policy) data_block.blocking_contexts = nag_step.blocking_contexts + sparse_sgd_step.blocking_contexts criterion = MaxIterCriterion(20000) optimizer = Optimizer(criterion, model) optimizer.add_observer(sparse_sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(valid_tracker) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()
def activate(self): """ Activates the device associated with the context. """ cudart.cuda_set_device(self.device_id)
c_fwd_repeat_block, h_fwd_repeat_block, fwd_lstm_block, c_bwd_repeat_block, h_bwd_repeat_block, bwd_lstm_block, seq_hstack, seq_dot_block, seq_sce_block]) logger = get_logger('train.log') momentum_policy = FixedValuePolicy(0.95) train_loss_tracker = TrainLossTracker(model, 100, logger) valid_tracker = ValidTracker(model, 500, logger) loss_tracker = LossForValidTracker(logger) valid_tracker.add_observer(loss_tracker) saver = Hdf5Saver(p.trainable_parameters, 5000, 'ptb_parameters.hdf5', logger) trainable_parameters = dict(p.trainable_parameters) sparse_sgd_step = SparseSgdStep([trainable_parameters['embd_W']], FixedValuePolicy(0.01)) del trainable_parameters['embd_W'] nag_step = NagStep(trainable_parameters.values(), FixedValuePolicy(0.01), momentum_policy) # nag_step = SgdStep(trainable_parameters.values(), learning_rate_policy) data_block.blocking_contexts = nag_step.blocking_contexts + sparse_sgd_step.blocking_contexts criterion = MaxIterCriterion(20000) optimizer = Optimizer(criterion, model) optimizer.add_observer(sparse_sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(valid_tracker) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()