def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id): self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True) self.data_iterator = iter(self.data) self.x_context = Context(x_device_id) self.y_context = Context(y_device_id) max_len = 0 for sub_line in data: cur_len = len(sub_line) if cur_len > max_len: max_len = cur_len print max_len self.x = Connector( Matrix.empty(batch_size, max_len - 1, 'int', x_device_id)) self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id) self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols) self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id) self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id) self.mask = List( [Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols) self.blocking_contexts = None
def __init__(self, matrix, axis=1, device_id=None): self.context = Context(device_id) self._ctype = matrix.c_dtype self._zero = self._ctype(0.0) if axis == 0: self._ones = Matrix.empty(1, matrix.nrows, matrix.dtype, device_id) self.output = Matrix.empty(1, matrix.ncols, matrix.dtype, device_id) self.alpha = self._ctype(1.0 / matrix.nrows) elif axis == 1: self._ones = Matrix.empty(matrix.ncols, 1, matrix.dtype, device_id) self.output = Matrix.empty(matrix.nrows, 1, matrix.dtype, device_id) self.alpha = None else: raise ValueError('Invalid axis!') self._ones.sync_fill(1.0) self.axis = axis if matrix.bpropagable: self.matrix, self.dL_dmatrix = matrix.register_usage( self.context, self.context) self.output = Connector(self.output, self.context, self.context) else: self.matrix = matrix.register_usage(self.context) self.output = Connector(self.output, self.context)
def __init__(self, W, b, x, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if W.bpropagable: self.W, self.dL_dW = W.register_usage(device_id, device_id) else: self.W = W.register_usage(device_id) if b: if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.ones = Matrix.empty(x.nrows, 1, self.b.dtype, device_id) self.ones.sync_fill(1.0) else: self.b = b.register_usage(device_id) if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) output = Matrix.empty(x.nrows, self.W.ncols, device_id=device_id) self.learning = hasattr(self, 'dL_dW') or hasattr(self, 'dL_db') or \ hasattr(self, 'dL_dx') if self.learning: self.b_context = Context(device_id) self.output = Connector(output, device_id) else: self.output = Connector(output)
def __init__(self, x, repeats, axis=None, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.repeats = repeats self.axis = axis learning = x.bpropagable if learning: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) if axis == 0: self.output = Matrix.empty(x.nrows * repeats, x.ncols, x.dtype, device_id) elif axis == 1: self.output = Matrix.empty(x.nrows, x.ncols * repeats, x.dtype, device_id) else: raise ValueError('TODO') self.output = Connector(self.output, device_id if learning else None)
def __init__(self, train_data, valid_data, batch_size, word_dropout_prob, device_id): self.train_data = HomogeneousDataIterator(train_data, batch_size, randomize=True, infinite=True) self.valid_data = HomogeneousDataIterator(valid_data, batch_size) self.train_data_iterator = iter(self.train_data) self.valid_data_iterator = iter(self.valid_data) self.word_keep_prob = 1.0 - word_dropout_prob self.rnd = RandomState(47571) self.unk_idx = word_to_idx['<UNK>'] self.context = Context(device_id) c = Counter([len(line) for line in chain(train_data, valid_data)]) print c.most_common() max_len = max([len(line) for line in chain(train_data, valid_data)]) self.enc_x = Connector(Matrix.empty(batch_size, max_len, 'int', device_id)) self.enc_lengths = Matrix.empty(self.enc_x.nrows, 1, 'int', device_id) self._enc_mask = Matrix.empty(self.enc_x.nrows, self.enc_x.ncols, 'float', device_id) self.enc_mask = List([Connector(self._enc_mask[:, i]) for i in xrange(max_len)], self.enc_x.ncols) self.dec_x = Connector(Matrix.empty(batch_size, max_len + 1, 'int', device_id)) self._dec_y = Matrix.empty(batch_size, max_len + 1, 'int', device_id) self.dec_y = List([Connector(self._dec_y[:, i]) for i in xrange(max_len + 1)], self._dec_y.ncols) self.dec_lengths = Matrix.empty(self.dec_x.nrows, 1, 'int', device_id) self._dec_mask = Matrix.empty(self.dec_x.nrows, self.dec_x.ncols, 'float', device_id) self.dec_mask = List([Connector(self._dec_mask[:, i]) for i in xrange(max_len + 1)], self.dec_x.ncols) self.blocking_contexts = None self.training_mode = True
def __init__(self, x, axis, device_id=None): if axis != 1: raise NotImplementedError self.axis = axis self.context = Context(device_id) device_id = self.context.device_id self.x = x.register_usage(device_id) self.output = Connector(Matrix.empty(x.nrows, 1, x.dtype, device_id))
def __init__(self, data, char_to_idx, batch_size, x_device_id, y_device_id): self.data = HomogeneousDataIterator(data, char_to_idx, batch_size, True, True) self.data_iterator = iter(self.data) self.x_context = Context(x_device_id) self.y_context = Context(y_device_id) max_len = 0 for sub_line in data: cur_len = len(sub_line) if cur_len > max_len: max_len = cur_len print max_len self.x = Connector(Matrix.empty(batch_size, max_len - 1, 'int', x_device_id)) self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id) self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)], self.x.ncols) self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id) self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float', x_device_id) self.mask = List([Connector(self._mask[:, i]) for i in xrange(max_len)], self.x.ncols) self.blocking_contexts = None
def __init__(self, x_sequence, y_sequence, device_id=None): """ TODO """ # TODO add during hsplit otherwise wrong accumulation of gradients if all(e.bpropagable for e in chain(x_sequence, y_sequence)): learning = True elif all(not e.bpropagable for e in chain(x_sequence, y_sequence)): learning = False else: raise ValueError('All elements should be bpropagable or ' 'non-bpropagable. Mixed state is not allowed!') x_ncols = x_sequence[0].ncols y_ncols = y_sequence[0].ncols dtype = x_sequence[0].dtype for x, y in izip(x_sequence, y_sequence): if x.ncols != x_ncols or y.ncols != y_ncols: raise ValueError( "All matrices in the sequence should have the same number of columns!" ) if x.nrows != y.nrows: raise ValueError( "Can't stack matrices in sequence with different number of rows!" ) if x.dtype != dtype or y.dtype != dtype: raise ValueError("Can't stack matrices with different dtypes!") self.context = Context(device_id) device_id = self.context.device_id if learning: self.x_sequence, self.dL_dx_sequences = izip( *x_sequence.register_usage(device_id, device_id)) self.y_sequence, self.dL_dy_sequences = izip( *y_sequence.register_usage(device_id, device_id)) self.dL_dx_sequences = List(self.dL_dx_sequences, x_sequence.length) self.dL_dy_sequences = List(self.dL_dy_sequences, y_sequence.length) else: self.x_sequence = x_sequence.register_usage(device_id) self.y_sequence = y_sequence.register_usage(device_id) self.x_sequence = List(self.x_sequence, x_sequence.length) self.y_sequence = List(self.y_sequence, y_sequence.length) output = [] for _ in xrange(x_sequence.length): matrix = Matrix.empty(x_sequence[0].nrows, x_ncols + y_ncols, dtype, device_id) output.append(Connector(matrix, device_id)) self.output = List(output, x_sequence.length) if learning: self.dL_dx_sequences = List(self.dL_dx_sequences, x_sequence.length) self.dL_dy_sequences = List(self.dL_dy_sequences, x_sequence.length)
def __init__(self, W, col_indexes): device_id = W.device_id self.context = Context(device_id) learning = W.bpropagable if learning: self.W, self.dL_dW = W.register_usage_with_sparse_backward_matrix() else: self.W = W.register_usage(device_id) self.col_indexes = col_indexes.register_usage(device_id) output = Matrix.empty(W.nrows, col_indexes.ncols, device_id=device_id) self.output = Connector(output, device_id if learning else None)
def __init__(self, R, b, grad_clipping, mask, prev_c, prev_h, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if R.bpropagable: self.R, self.dL_dR = R.register_usage(device_id, device_id) self.R_b_context = Context(device_id) else: self.R = R.register_usage(device_id) if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.b_b_context = Context(device_id) else: self.b = b.register_usage(device_id) self.grad_clipping = grad_clipping if mask: self.mask = mask.register_usage(device_id) if prev_c.bpropagable: self.prev_c, self.dL_dprev_c = prev_c.register_usage(device_id, device_id) else: self.prev_c = prev_c.register_usage(device_id) if prev_h.bpropagable: self.prev_h, self.dL_dprev_h = prev_h.register_usage(device_id, device_id) else: self.prev_h = prev_h.register_usage(device_id) self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable if self.learning: self.b_context = Context(device_id) dim = self.R.nrows batch_size = self.prev_c.nrows self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id) self.z = self.zifo[:, 0*dim:1*dim] self.i = self.zifo[:, 1*dim:2*dim] self.f = self.zifo[:, 2*dim:3*dim] self.o = self.zifo[:, 3*dim:4*dim] self.c = Matrix.empty_like(self.prev_c, device_id) self.c = Connector(self.c, device_id if self.learning else None) self.tanh_c = Matrix.empty_like(self.c, device_id) self.h = Matrix.empty_like(self.c, device_id) self.h = Connector(self.h, device_id if self.learning else None) if self.learning: self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo) self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0*dim:1*dim] self.di_dpre_i = self._dzifo_dpre_zifo[:, 1*dim:2*dim] self.df_dpre_f = self._dzifo_dpre_zifo[:, 2*dim:3*dim] self.do_dpre_o = self._dzifo_dpre_zifo[:, 3*dim:4*dim] self.dL_dpre_zifo = self._dzifo_dpre_zifo self.dL_dpre_z = self.dz_dpre_z self.dL_dpre_i = self.di_dpre_i self.dL_dpre_f = self.df_dpre_f self.dL_dpre_o = self.do_dpre_o self._dtanh_c_dc = Matrix.empty_like(self.c)
def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len, device_id): self.blocking_contexts = None self.context = Context(device_id) device_id = self.context.device_id self.train_offsets = HomogeneousDataGenerator(ptb_train, batch_size, sentence_max_len, randomize=True, infinite=True) self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size, sentence_max_len) train_sentences = np.array([self.train_offsets.flatten_sentences]) valid_sentences = np.array([self.valid_offsets.flatten_sentences]) self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id) self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id) self._sent_lengths = np.empty((batch_size, 1), dtype=np.int32, order='F')[...] self.sent_lengths = Matrix.from_npa(self._sent_lengths, device_id=device_id) sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int', device_id) self.sentence_batch = Connector(sentence_batch, self.context) self.sentence_batch.sync_fill(0) self._mask = Matrix.empty(sentence_batch.nrows, self.sentence_batch.ncols, 'float', device_id) self.mask = List([Connector(self._mask[:, i]) for i in xrange(sentence_max_len)], self.sentence_batch.ncols) self.train_offsets_iterator = iter(self.train_offsets) self.valid_offsets_iterator = iter(self.valid_offsets) self.training_mode = True
def __init__(self, matrix, axis=1, device_id=None): self.context = Context(device_id) self._ctype = matrix.c_dtype self._zero = self._ctype(0.0) if axis == 0: self._ones = Matrix.empty(1, matrix.nrows, matrix.dtype, device_id) self.output = Matrix.empty(1, matrix.ncols, matrix.dtype, device_id) self.alpha = self._ctype(1.0 / matrix.nrows) elif axis == 1: self._ones = Matrix.empty(matrix.ncols, 1, matrix.dtype, device_id) self.output = Matrix.empty(matrix.nrows, 1, matrix.dtype, device_id) self.alpha = None else: raise ValueError('Invalid axis!') self._ones.sync_fill(1.0) self.axis = axis if matrix.bpropagable: self.matrix, self.dL_dmatrix = matrix.register_usage(self.context, self.context) self.output = Connector(self.output, self.context, self.context) else: self.matrix = matrix.register_usage(self.context) self.output = Connector(self.output, self.context)
def __init__(self, W, row_indexes, dense=True): self.dense = dense device_id = W.device_id self.context = Context(device_id) learning = W.bpropagable if learning: if dense: self.W, self.dL_dW = W.register_usage(device_id, device_id) else: self.W, self.dL_dW = W.register_usage_with_sparse_backward_matrix() else: self.W = W.register_usage(device_id) self.row_indexes = row_indexes.register_usage(device_id) if row_indexes.ncols > 1: self.output = [] for i in xrange(row_indexes.ncols): output = Matrix.empty(row_indexes.nrows, W.ncols, device_id=device_id) output = Connector(output, device_id if learning else None) self.output.append(output) self.output = List(self.output, row_indexes.ncols) else: output = Matrix.empty(row_indexes.nrows, W.ncols, device_id=device_id) self.output = Connector(output, device_id if learning else None)
def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len, device_id): self.blocking_contexts = None self.context = Context(device_id) device_id = self.context.device_id self.train_offsets = HomogeneousDataGenerator(ptb_train, batch_size, sentence_max_len, randomize=True, infinite=True) self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size, sentence_max_len) train_sentences = np.array([self.train_offsets.flatten_sentences]) valid_sentences = np.array([self.valid_offsets.flatten_sentences]) self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id) self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id) self._sent_lengths = np.empty((batch_size, 1), dtype=np.int32, order='F')[...] self.sent_lengths = Matrix.from_npa(self._sent_lengths, device_id=device_id) sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int', device_id) self.sentence_batch = Connector(sentence_batch, self.context) self.sentence_batch.sync_fill(0) self._mask = Matrix.empty(sentence_batch.nrows, self.sentence_batch.ncols, 'float', device_id) self.mask = List( [Connector(self._mask[:, i]) for i in xrange(sentence_max_len)], self.sentence_batch.ncols) self.train_offsets_iterator = iter(self.train_offsets) self.valid_offsets_iterator = iter(self.valid_offsets) self.training_mode = True
def __init__(self, train_x, train_y, valid_x, valid_y, batch_size, device_id): self.context = Context(device_id) device_id = self.context.device_id self.train_x = Matrix.from_npa(train_x.T.astype(np.float32), device_id=device_id) self.valid_x = Matrix.from_npa(valid_x.T.astype(np.float32), device_id=device_id) self.train_y = Matrix.from_npa(train_y[:, np.newaxis], 'int', device_id=device_id) self.valid_y = Matrix.from_npa(valid_y[:, np.newaxis], 'int', device_id=device_id) self.batch_size = batch_size x = Matrix.empty(self.batch_size, self.train_x.nrows, device_id=device_id) y = Matrix.empty(self.batch_size, 1, 'int', device_id) self.x = Connector(x) self.y = Connector(y) self.train_indices = np.arange(int(self.train_x.ncols), dtype=np.int32) self.valid_indices = np.arange(int(self.valid_x.ncols), dtype=np.int32) self.indices = Matrix.empty(self.batch_size, 1, 'int', device_id) self.rng = np.random.RandomState(42) self.rng.shuffle(self.train_indices) self.train_i = 0 self.valid_i = 0 self.training_mode = True self.blocking_contexts = None
def __init__(self, x_sequence, y_sequence, device_id=None): """ TODO """ # TODO add during hsplit otherwise wrong accumulation of gradients if all(e.bpropagable for e in chain(x_sequence, y_sequence)): learning = True elif all(not e.bpropagable for e in chain(x_sequence, y_sequence)): learning = False else: raise ValueError('All elements should be bpropagable or ' 'non-bpropagable. Mixed state is not allowed!') x_ncols = x_sequence[0].ncols y_ncols = y_sequence[0].ncols dtype = x_sequence[0].dtype for x, y in izip(x_sequence, y_sequence): if x.ncols != x_ncols or y.ncols != y_ncols: raise ValueError("All matrices in the sequence should have the same number of columns!") if x.nrows != y.nrows: raise ValueError("Can't stack matrices in sequence with different number of rows!") if x.dtype != dtype or y.dtype != dtype: raise ValueError("Can't stack matrices with different dtypes!") self.context = Context(device_id) device_id = self.context.device_id if learning: self.x_sequence, self.dL_dx_sequences = izip(*x_sequence.register_usage(device_id, device_id)) self.y_sequence, self.dL_dy_sequences = izip(*y_sequence.register_usage(device_id, device_id)) self.dL_dx_sequences = List(self.dL_dx_sequences, x_sequence.length) self.dL_dy_sequences = List(self.dL_dy_sequences, y_sequence.length) else: self.x_sequence = x_sequence.register_usage(device_id) self.y_sequence = y_sequence.register_usage(device_id) self.x_sequence = List(self.x_sequence, x_sequence.length) self.y_sequence = List(self.y_sequence, y_sequence.length) output = [] for _ in xrange(x_sequence.length): matrix = Matrix.empty(x_sequence[0].nrows, x_ncols + y_ncols, dtype, device_id) output.append(Connector(matrix, device_id)) self.output = List(output, x_sequence.length) if learning: self.dL_dx_sequences = List(self.dL_dx_sequences, x_sequence.length) self.dL_dy_sequences = List(self.dL_dy_sequences, x_sequence.length)
def __init__(self, *matrices, **kwargs): # TODO(sergii): change hsplit to aditive_hsplit for propper gradients accumulation self.context = Context(kwargs.get('device_id')) device_id = self.context.device_id self.matrices = [] self.dL_dmatrices = [] self.bpropagable = [] for matrix in matrices: self.bpropagable.append(matrix.bpropagable) if matrix.bpropagable: matrix, dL_dmatrix = matrix.register_usage(device_id, device_id) self.dL_dmatrices.append(dL_dmatrix) else: matrix = matrix.register_usage(device_id) self.matrices.append(matrix) ncols = [matrix.ncols for matrix in matrices] ncols = sum([e for e in ncols[1:]], ncols[0]) dtype = matrices[0].dtype bu_device_id = device_id if self.dL_dmatrices else None output = Matrix.empty(matrices[0].nrows, ncols, dtype, device_id) self.output = Connector(output, bu_device_id)
def __init__(self, matrices, u, mask=None, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.output = Matrix.empty_like(matrices[0], device_id) learning = matrices[0].bpropagable or u.bpropagable self.output = Connector(self.output, device_id if learning else None) if matrices[0].bpropagable: self.matrices, self.dL_dmatrices = \ izip(*matrices.register_usage(device_id, device_id)) else: self.matrices = matrices.register_usage(device_id) self.length = matrices.length if u.bpropagable: self.u, self.dL_du = u.register_usage(device_id, device_id) else: self.u = u.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) self.a = Matrix.empty(matrices[0].nrows, matrices.length, 'float', device_id) self.dL_dpre_a = Matrix.empty_like(self.a) self.a_cols = [self.a[:, i] for i in xrange(len(self.matrices))]
def __init__(self, matrices, u, mask=None, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.output = Matrix.empty_like(matrices[0], device_id) learning = matrices[0].bpropagable or u.bpropagable self.output = Connector(self.output, device_id if learning else None) if matrices[0].bpropagable: self.matrices, self.dL_dmatrices = \ izip(*matrices.register_usage(device_id, device_id)) else: self.matrices = matrices.register_usage(device_id) self.length = matrices.length if u.bpropagable: self.u, self.dL_du = u.register_usage(device_id, device_id) else: self.u = u.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) self.a = Matrix.empty(matrices[0].nrows, matrices.length, 'float', device_id) self.dL_dpre_a = Matrix.empty_like(self.a) self.a_cols = [self.a[:, i] for i in xrange(len(self.matrices))]
def __init__(self, word_to_idx, device_id): self.context = Context(device_id) device_id = self.context.device_id self.word_idx = Connector(Matrix.empty(1, 1, 'int', device_id)) self.word_to_idx = word_to_idx self.word = None
def test_bprop(self): """ compare `bprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers( max_input_sequence_len) batch_size = self.rng.random_integers(256) input_dim, hidden_dim = self.rng.random_integers(1500, size=2) x = [ self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len) ] true_labels = [ self.rng.randint(2, size=(batch_size, 1)).astype(np.float32) for _ in xrange(max_input_sequence_len) ] mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype( np.float32) h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) W_z = self.get_orthogonal_matrix(input_dim, hidden_dim) W_i = self.get_orthogonal_matrix(input_dim, hidden_dim) W_f = self.get_orthogonal_matrix(input_dim, hidden_dim) W_o = self.get_orthogonal_matrix(input_dim, hidden_dim) W = np.hstack((W_z, W_i, W_f, W_o)) R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R = np.hstack((R_z, R_i, R_f, R_o)) lr_W = self.get_orthogonal_matrix(hidden_dim, 1) lr_b = self.rng.rand(1, 1).astype(dtype=np.float32) device_id = 0 quagga_grads = {} for reverse in [False, True]: for with_mask in [False, True]: for learn_inital_states in [False, True]: for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type context = Context() qx = List([ Connector(Matrix.from_npa(e), device_id) for e in x ]) qtrue_labels = List([ Connector(Matrix.from_npa(e)) for e in true_labels ], len(qx)) qmask = Matrix.empty(batch_size, len(qx)) qh_0 = Connector( Matrix.from_npa(h_0), device_id if learn_inital_states else None) qc_0 = Connector( Matrix.from_npa(c_0), device_id if learn_inital_states else None) qW = Connector(Matrix.from_npa(W), device_id) qR = Connector(Matrix.from_npa(R), device_id) qlr_W = Connector(Matrix.from_npa(lr_W), device_id) qlr_b = Connector(Matrix.from_npa(lr_b), device_id) sequences = [qx] if with_mask: sequences.append( List([ Connector(qmask[:, i]) for i in xrange(len(qx)) ], len(qx))) qmask.assign_npa(context, mask) qmask = sequences[-1] else: sequences.append([None] * len(qx)) lstm = SequencerBlock(block_class=LstmBlock, params=[qW, qR], sequences=sequences, output_names=['h'], prev_names=['c', 'h'], paddings=[qc_0, qh_0], reverse=reverse) seq_dot_block = SequencerBlock( block_class=DotBlock, params=[qlr_W, qlr_b], sequences=[lstm.h], output_names=['output']) seq_sce_block = SequencerBlock( block_class=SigmoidCeBlock, params=[], sequences=[seq_dot_block.output, qtrue_labels ] + ([qmask] if with_mask else [])) qx.length = sequence_len qx.fprop() qtrue_labels.fprop() if with_mask: qmask.fprop() qlr_W.fprop() qlr_b.fprop() qh_0.fprop() qc_0.fprop() qW.fprop() qR.fprop() lstm.fprop() seq_dot_block.fprop() seq_sce_block.fprop() seq_sce_block.bprop() seq_dot_block.bprop() lstm.bprop() quagga_grads[processor_type] = [ qlr_b.backward_matrix.to_host(), qlr_W.backward_matrix.to_host(), qW.backward_matrix.to_host(), qR.backward_matrix.to_host() ] if learn_inital_states: quagga_grads[processor_type].append( qc_0.backward_matrix.to_host()) quagga_grads[processor_type].append( qh_0.backward_matrix.to_host()) quagga_grads[processor_type].extend( e.backward_matrix.to_host() for e in qx) for grad_gpu, grad_cpu in izip(quagga_grads['gpu'], quagga_grads['cpu']): r.append(np.allclose(grad_gpu, grad_cpu, atol=1e-6)) self.assertEqual(sum(r), len(r))
def __init__(self, char_to_idx, device_id): self.context = Context(device_id) device_id = self.context.device_id self.char_idx = Connector(Matrix.empty(1, 1, 'int', device_id)) self.char_to_idx = char_to_idx self.char = None
def __init__(self, char_to_idx, device_id): self.context = Context(device_id) device_id = self.context.device_id self.char_idx = Connector(Matrix.empty(1, 1, 'int', device_id)) self.char_to_idx = char_to_idx self.char = None
def __init__(self, word_to_idx, device_id): self.context = Context(device_id) device_id = self.context.device_id self.word_idx = Connector(Matrix.empty(1, 1, 'int', device_id)) self.word_to_idx = word_to_idx self.word = None
def test_theano_fprop(self): quagga.processor_type = 'gpu' r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers( max_input_sequence_len) batch_size = self.rng.random_integers(256) input_dim, hidden_dim = self.rng.random_integers(1500, size=2) x = [ self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len) ] mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype( np.float32) h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) W_z = self.get_orthogonal_matrix(input_dim, hidden_dim) W_i = self.get_orthogonal_matrix(input_dim, hidden_dim) W_f = self.get_orthogonal_matrix(input_dim, hidden_dim) W_o = self.get_orthogonal_matrix(input_dim, hidden_dim) W = np.hstack((W_z, W_i, W_f, W_o)) R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R = np.hstack((R_z, R_i, R_f, R_o)) for reverse in [False, True]: for with_mask in [False, True]: context = Context() qx = List([Connector(Matrix.from_npa(e)) for e in x]) qmask = Connector( Matrix.empty(batch_size, len(qx), 'float')) qh_0 = Connector(Matrix.from_npa(h_0)) qc_0 = Connector(Matrix.from_npa(c_0)) qW = Connector(Matrix.from_npa(W)) qR = Connector(Matrix.from_npa(R)) lstm = SequencerBlock(block_class=LstmBlock, params=[qW, qR], sequences=[qx] + ([qmask] if with_mask else []), output_names=['h'], prev_names=['c', 'h'], paddings=[qc_0, qh_0], reverse=reverse) qx.length = sequence_len for e in qx: e.fprop() qmask.assign_npa(context, mask) qmask.fprop() qh_0.fprop() qc_0.fprop() qW.fprop() qR.fprop() lstm.fprop() q_h = lstm.h.to_host() th_x = T.ftensor3() lstm_layer = LstmLayer(W, R, c_0, h_0, reverse) if with_mask: th_mask = T.fmatrix() get_th_h = theano.function([th_x, th_mask], lstm_layer.get_output_expr( th_x, th_mask)) th_h = get_th_h(np.dstack(x[:sequence_len]), mask[:, :sequence_len]) else: get_th_h = theano.function( [th_x], lstm_layer.get_output_expr(th_x)) th_h = get_th_h(np.dstack(x[:sequence_len])) for i in xrange(th_h.shape[0]): if not np.allclose(q_h[i], th_h[i]): r.append(False) break else: r.append(True) self.assertEqual(sum(r), len(r))
def __init__(self, R, b, grad_clipping, mask, prev_c, prev_h, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if R.bpropagable: self.R, self.dL_dR = R.register_usage(device_id, device_id) self.R_b_context = Context(device_id) else: self.R = R.register_usage(device_id) if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.b_b_context = Context(device_id) else: self.b = b.register_usage(device_id) self.grad_clipping = grad_clipping if mask: self.mask = mask.register_usage(device_id) if prev_c.bpropagable: self.prev_c, self.dL_dprev_c = prev_c.register_usage( device_id, device_id) else: self.prev_c = prev_c.register_usage(device_id) if prev_h.bpropagable: self.prev_h, self.dL_dprev_h = prev_h.register_usage( device_id, device_id) else: self.prev_h = prev_h.register_usage(device_id) self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable if self.learning: self.b_context = Context(device_id) dim = self.R.nrows batch_size = self.prev_c.nrows self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id) self.z = self.zifo[:, 0 * dim:1 * dim] self.i = self.zifo[:, 1 * dim:2 * dim] self.f = self.zifo[:, 2 * dim:3 * dim] self.o = self.zifo[:, 3 * dim:4 * dim] self.c = Matrix.empty_like(self.prev_c, device_id) self.c = Connector(self.c, device_id if self.learning else None) self.tanh_c = Matrix.empty_like(self.c, device_id) self.h = Matrix.empty_like(self.c, device_id) self.h = Connector(self.h, device_id if self.learning else None) if self.learning: self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo) self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0 * dim:1 * dim] self.di_dpre_i = self._dzifo_dpre_zifo[:, 1 * dim:2 * dim] self.df_dpre_f = self._dzifo_dpre_zifo[:, 2 * dim:3 * dim] self.do_dpre_o = self._dzifo_dpre_zifo[:, 3 * dim:4 * dim] self.dL_dpre_zifo = self._dzifo_dpre_zifo self.dL_dpre_z = self.dz_dpre_z self.dL_dpre_i = self.di_dpre_i self.dL_dpre_f = self.df_dpre_f self.dL_dpre_o = self.do_dpre_o self._dtanh_c_dc = Matrix.empty_like(self.c)
def test_theano_grad(self): quagga.processor_type = 'gpu' r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(300) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers( max_input_sequence_len) batch_size = self.rng.random_integers(128) input_dim, hidden_dim, class_num = self.rng.random_integers(1500, size=3) x = [ self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len) ] true_labels = [ self.rng.randint(class_num, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len) ] mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype( np.float32) h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) W_z = self.get_orthogonal_matrix(input_dim, hidden_dim) W_i = self.get_orthogonal_matrix(input_dim, hidden_dim) W_f = self.get_orthogonal_matrix(input_dim, hidden_dim) W_o = self.get_orthogonal_matrix(input_dim, hidden_dim) W = np.hstack((W_z, W_i, W_f, W_o)) R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R = np.hstack((R_z, R_i, R_f, R_o)) lr_W = self.get_orthogonal_matrix(hidden_dim, class_num) lr_b = self.rng.rand(1, class_num).astype(dtype=np.float32) device_id = 0 for reverse in [False, True]: for with_mask in [False, True]: for learn_inital_states in [False, True]: # quagga model context = Context() qx = List([ Connector(Matrix.from_npa(e), device_id) for e in x ]) qtrue_labels = List([ Connector(Matrix.from_npa(e)) for e in true_labels ], qx.length) qmask = Matrix.empty(batch_size, qx.length, 'float') qmask_list = [ Connector(qmask[:, i]) for i in xrange(qmask.ncols) ] qmask = Connector(qmask) qh_0 = Connector( Matrix.from_npa(h_0), device_id if learn_inital_states else None) qc_0 = Connector( Matrix.from_npa(c_0), device_id if learn_inital_states else None) qW = Connector(Matrix.from_npa(W), device_id) qR = Connector(Matrix.from_npa(R), device_id) qlr_W = Connector(Matrix.from_npa(lr_W), device_id) qlr_b = Connector(Matrix.from_npa(lr_b), device_id) lstm = SequencerBlock( block_class=LstmBlock, params=[qW, qR], sequences=[ qx, qmask_list if with_mask else [None] * len(qx) ], output_names=['h'], prev_names=['c', 'h'], paddings=[qc_0, qh_0], reverse=reverse) seq_dot_block = SequencerBlock(block_class=DotBlock, params=[qlr_W, qlr_b], sequences=[lstm.h], output_names=['output']) seq_sce_block = SequencerBlock( block_class=SoftmaxCeBlock, params=[], sequences=[ seq_dot_block.output, qtrue_labels, qmask_list if with_mask else [None] * len(qx) ]) qx.length = sequence_len for e in qx: e.fprop() for e in qtrue_labels: e.fprop() qmask.assign_npa(context, mask) qmask.fprop() qlr_W.fprop() qlr_b.fprop() qh_0.fprop() qc_0.fprop() qW.fprop() qR.fprop() lstm.fprop() seq_dot_block.fprop() seq_sce_block.fprop() seq_sce_block.bprop() seq_dot_block.bprop() lstm.bprop() quagga_grads = [ qlr_b.backward_matrix.to_host(), qlr_W.backward_matrix.to_host(), qW.backward_matrix.to_host(), qR.backward_matrix.to_host() ] if learn_inital_states: quagga_grads.append(qc_0.backward_matrix.to_host()) quagga_grads.append(qh_0.backward_matrix.to_host()) quagga_grads.append( [e.backward_matrix.to_host() for e in qx]) del qx del qlr_b del qlr_W del qW del qR del qmask del lstm del seq_dot_block del seq_sce_block # theano model th_x = T.ftensor3() th_true_labels = T.imatrix() th_mask = T.fmatrix() lstm_layer = LstmLayer(W, R, c_0, h_0, reverse=reverse) th_h = lstm_layer.get_output_expr( th_x, th_mask if with_mask else None) seq_softmax_layer = SequentialSoftmaxLayer( lr_W, lr_b, reverse) loss = seq_softmax_layer.get_loss( th_h, th_true_labels, th_mask if with_mask else None) wrt = [ seq_softmax_layer.b, seq_softmax_layer.W, lstm_layer.W, lstm_layer.R ] if learn_inital_states: wrt.append(lstm_layer.c0) wrt.append(lstm_layer.h0) wrt.append(th_x) grads = T.grad(loss, wrt) if with_mask: get_theano_grads = theano.function( [th_x, th_true_labels, th_mask], grads) theano_grads = get_theano_grads( np.dstack(x[:sequence_len]), np.hstack(true_labels[:sequence_len]), mask[:, :sequence_len]) else: get_theano_grads = theano.function( [th_x, th_true_labels], grads) theano_grads = get_theano_grads( np.dstack(x[:sequence_len]), np.hstack(true_labels[:sequence_len])) for quagga_grad, theano_grad in izip( quagga_grads[:-1], theano_grads[:-1]): r.append( np.allclose(quagga_grad, theano_grad, atol=1e-6)) for i in xrange(theano_grads[-1].shape[-1]): if not np.allclose(quagga_grads[-1][i], theano_grads[-1][..., i], atol=1e-6): r.append(False) break else: r.append(True) self.assertEqual(sum(r), len(r))
def test_fprop(self): """ compare `fprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): max_input_sequence_len = self.rng.random_integers(500) sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers( max_input_sequence_len) batch_size = self.rng.random_integers(256) input_dim, hidden_dim = self.rng.random_integers(1500, size=2) x = [ self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len) ] mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype( np.float32) h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32) W_z = self.get_orthogonal_matrix(input_dim, hidden_dim) W_i = self.get_orthogonal_matrix(input_dim, hidden_dim) W_f = self.get_orthogonal_matrix(input_dim, hidden_dim) W_o = self.get_orthogonal_matrix(input_dim, hidden_dim) W = np.hstack((W_z, W_i, W_f, W_o)) R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim) R = np.hstack((R_z, R_i, R_f, R_o)) qh = {} for reverse in [False, True]: for with_mask in [False, True]: for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type context = Context() qx = List([Connector(Matrix.from_npa(e)) for e in x]) qmask = Matrix.empty(batch_size, len(qx), 'float') qh_0 = Connector(Matrix.from_npa(h_0)) qc_0 = Connector(Matrix.from_npa(c_0)) qW = Connector(Matrix.from_npa(W)) qR = Connector(Matrix.from_npa(R)) sequences = [qx] if with_mask: sequences.append( List([ Connector(qmask[:, i]) for i in xrange(len(qx)) ], len(qx))) qmask.assign_npa(context, mask) qmask = sequences[-1] else: sequences.append([None] * len(qx)) lstm = SequencerBlock(block_class=LstmBlock, params=[qW, qR], sequences=sequences, output_names=['h'], prev_names=['c', 'h'], paddings=[qc_0, qh_0], reverse=reverse) qx.length = sequence_len if with_mask: qmask.fprop() qx.fprop() qh_0.fprop() qc_0.fprop() qW.fprop() qR.fprop() lstm.fprop() qh[processor_type] = lstm.h.to_host() for h_gpu, h_cpu in izip(qh['gpu'], qh['cpu']): if not np.allclose(h_gpu, h_cpu, rtol=1e-7, atol=1e-3): r.append(False) break else: r.append(True) self.assertEqual(sum(r), len(r))