def register_usage(self, fu_device_id, bo_device_id=None): """ Register usage of connector's forward_matrix. :param fu_device_id: context in which `forward_matrix` will be used :param bo_device_id: context in which `backward_matrix` of the connector will be calculated """ if not self.bpropagable and bo_device_id: raise ValueError( "Nobody is going to use computation from backward step. " "You mustn't register for backward propagate!") if fu_device_id != self._fo_device_id and fu_device_id not in self._f_matrices: self._f_matrices[fu_device_id] = Matrix.empty_like( self, fu_device_id) self.context[fu_device_id] = Context(fu_device_id) if bo_device_id is None: return self._f_matrices[fu_device_id] for device_id in [self._bu_device_id, bo_device_id]: if device_id not in self._b_matrices: self._b_matrices[device_id] = Matrix.empty_like( self, device_id) if device_id not in self.context: self.context[device_id] = Context(device_id) if self._bu_device_id != bo_device_id and self._bu_device_id not in self._b_matrices_pool: self._b_matrices_pool[self._bu_device_id] = Matrix.empty_like( self, self._bu_device_id) return self._f_matrices[fu_device_id], self._b_matrices[bo_device_id]
def __init__(self, x, nonlinearity, device_id=None): """ """ self.f_context = Context(device_id) device_id = self.f_context.device_id self.learning = x.bpropagable if self.learning: self.b_context = Context(device_id) self.x, self.dL_dx = x.register_usage(device_id, device_id) self._df_dpref = Matrix.empty_like(self.x, device_id) else: self.x = x.register_usage(device_id) output = Matrix.empty_like(x, device_id) self.output = Connector(output, device_id if self.learning else None) if nonlinearity == "sigmoid": self.f = self.x.sigmoid elif nonlinearity == "tanh": self.f = self.x.tanh elif nonlinearity == "relu": self.f = self.x.relu elif nonlinearity == "softmax": raise ValueError("For softmax nonlinearity use SoftmaxBlock!") else: raise ValueError("TODO!") self.training_mode = True
def register_usage(self, fu_device_id, bo_device_id=None): """ Register usage of connector's forward_matrix. :param fu_device_id: context in which `forward_matrix` will be used :param bo_device_id: context in which `backward_matrix` of the connector will be calculated """ if not self.bpropagable and bo_device_id: raise ValueError("Nobody is going to use computation from backward step. " "You mustn't register for backward propagate!") if fu_device_id != self._fo_device_id and fu_device_id not in self._f_matrices: self._f_matrices[fu_device_id] = Matrix.empty_like(self, fu_device_id) self.context[fu_device_id] = Context(fu_device_id) if bo_device_id is None: return self._f_matrices[fu_device_id] for device_id in [self._bu_device_id, bo_device_id]: if device_id not in self._b_matrices: self._b_matrices[device_id] = Matrix.empty_like(self, device_id) if device_id not in self.context: self.context[device_id] = Context(device_id) if self._bu_device_id != bo_device_id and self._bu_device_id not in self._b_matrices_pool: self._b_matrices_pool[self._bu_device_id] = Matrix.empty_like(self, self._bu_device_id) return self._f_matrices[fu_device_id], self._b_matrices[bo_device_id]
def __init__(self, R, b, grad_clipping, mask, prev_c, prev_h, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if R.bpropagable: self.R, self.dL_dR = R.register_usage(device_id, device_id) self.R_b_context = Context(device_id) else: self.R = R.register_usage(device_id) if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.b_b_context = Context(device_id) else: self.b = b.register_usage(device_id) self.grad_clipping = grad_clipping if mask: self.mask = mask.register_usage(device_id) if prev_c.bpropagable: self.prev_c, self.dL_dprev_c = prev_c.register_usage(device_id, device_id) else: self.prev_c = prev_c.register_usage(device_id) if prev_h.bpropagable: self.prev_h, self.dL_dprev_h = prev_h.register_usage(device_id, device_id) else: self.prev_h = prev_h.register_usage(device_id) self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable if self.learning: self.b_context = Context(device_id) dim = self.R.nrows batch_size = self.prev_c.nrows self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id) self.z = self.zifo[:, 0*dim:1*dim] self.i = self.zifo[:, 1*dim:2*dim] self.f = self.zifo[:, 2*dim:3*dim] self.o = self.zifo[:, 3*dim:4*dim] self.c = Matrix.empty_like(self.prev_c, device_id) self.c = Connector(self.c, device_id if self.learning else None) self.tanh_c = Matrix.empty_like(self.c, device_id) self.h = Matrix.empty_like(self.c, device_id) self.h = Connector(self.h, device_id if self.learning else None) if self.learning: self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo) self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0*dim:1*dim] self.di_dpre_i = self._dzifo_dpre_zifo[:, 1*dim:2*dim] self.df_dpre_f = self._dzifo_dpre_zifo[:, 2*dim:3*dim] self.do_dpre_o = self._dzifo_dpre_zifo[:, 3*dim:4*dim] self.dL_dpre_zifo = self._dzifo_dpre_zifo self.dL_dpre_z = self.dz_dpre_z self.dL_dpre_i = self.di_dpre_i self.dL_dpre_f = self.df_dpre_f self.dL_dpre_o = self.do_dpre_o self._dtanh_c_dc = Matrix.empty_like(self.c)
def bprop(self): if not self.bpropagable: raise ValueError( 'Nobody was going to use computation from backward ' 'step. You should not backward propagate!') if not self._b_matrices and not self._b_sparse_matrix: # When no one registered for providing derivatives zero dense # matrix will be returned bwd = Matrix.empty_like(self, self._bu_device_id) if self._bu_device_id not in self.context: self.context[self._bu_device_id] = Context(self._bu_device_id) bwd.fill(self.context[self._bu_device_id], 0.0) self._b_matrices[self._bu_device_id] = bwd return bwd if not self._b_matrices and self._b_sparse_matrix: return self._b_sparse_matrix for bo_device_id, bwd_matrix in self._b_matrices.iteritems(): if self._bu_device_id != bo_device_id: self._b_matrices_pool[self._bu_device_id].assign( self.context[self._bu_device_id], bwd_matrix) self._b_matrices[self._bu_device_id].add( self.context[self._bu_device_id], self._b_matrices_pool[self._bu_device_id]) if self._b_sparse_matrix: self._b_matrices[self._bu_device_id].add( self.context[self._bu_device_id], self._b_sparse_matrix) return self._b_matrices[self._bu_device_id]
def __init__(self, parameters, learning_rate_policy, momentum_policy, ema_decay=0.9, epsilon=1e-6): self.parameters = parameters self.grad_sqr = [] self.velocity = [] for p in self.parameters: grad_sqr = Matrix.empty_like(p) grad_sqr.sync_fill(0.0) self.grad_sqr.append(grad_sqr) v = Matrix.empty_like(p) v.sync_fill(0.0) self.velocity.append(v) self.learning_rate_policy = learning_rate_policy self.momentum_policy = momentum_policy self.ema_decay = ema_decay self.epsilon = epsilon self.contexts = [Context(p.device_id) for p in parameters] self.blocking_contexts = []
def __init__(self, probs, true_labels, schedule, seed, device_id=None): self.schedule = schedule self.rnd = np.random.RandomState(seed) self.context = Context(device_id) device_id = self.context.device_id self.probs = probs.register_usage(device_id) self.true_labels = true_labels.register_usage(device_id) self.output = Connector(Matrix.empty_like(self.true_labels))
def __init__(self, parameters, learning_rate_policy, beta1=0.9, beta2=0.999, epsilon=1e-8): self.parameters = parameters self.m = [] self.v = [] self.contexts = [] for p in self.parameters: m = Matrix.empty_like(p) m.sync_fill(0.0) self.m.append(m) v = Matrix.empty_like(p) v.sync_fill(0.0) self.v.append(v) self.contexts.append(Context(p.device_id)) self.learning_rate_policy = learning_rate_policy self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.blocking_contexts = [] self.iteration = 0
def __init__(self, x, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.learning = x.bpropagable if self.learning: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) self.x = x.register_usage(device_id) self.output = Connector(Matrix.empty_like(self.x), device_id if self.learning else None)
def __init__(self, parameters, learning_rate_policy, momentum_policy): self.parameters = parameters self.velocity = [] for p in self.parameters: v = Matrix.empty_like(p) v.sync_fill(0.0) self.velocity.append(v) self.learning_rate_policy = learning_rate_policy self.momentum_policy = momentum_policy self.contexts = [Context(p.device_id) for p in parameters] self.blocking_contexts = []
def __init__(self, x): device_id = x[0].device_id learning = x[0].bpropagable self.context = Context(device_id) self.output = Matrix.empty_like(x[0]) self.output = Connector(self.output, device_id if learning else None) if learning: self.x, self.dL_dx = izip(*x.register_usage(device_id, device_id)) else: self.x = x.register_usage(device_id) self.last_idx = x.length - 1
def __init__(self, matrices, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.output = Matrix.empty_like(matrices[0], device_id) learning = matrices[0].bpropagable self.output = Connector(self.output, device_id if learning else None) if learning: self.matrices, self.dL_dmatrices = izip(*matrices.register_usage(device_id, device_id)) else: self.matrices = matrices.register_usage(device_id) self.length = matrices.length
def __init__(self, x, true_labels, mask=None, device_id=None): self.context = Context(device_id) device_id = self.context.device_id if x.bpropagable: self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) self.true_labels = true_labels.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) self.probs = Connector(Matrix.empty_like(self.x)) self.loss = None
def __init__(self, matrices, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.output = Matrix.empty_like(matrices[0], device_id) learning = matrices[0].bpropagable self.output = Connector(self.output, device_id if learning else None) if learning: self.matrices, self.dL_dmatrices = izip( *matrices.register_usage(device_id, device_id)) else: self.matrices = matrices.register_usage(device_id) self.length = matrices.length
def __init__(self, dropout_prob, x, seed=42, device_id=None): self.dropout_prob = dropout_prob self.f_context = Context(device_id) device_id = self.f_context.device_id self.generator = Matrix.get_random_generator(seed) if x.bpropagable: self.b_context = Context(device_id) self.x, self.dL_dx = x.register_usage(device_id, device_id) else: self.x = x.register_usage(device_id) self.output = Matrix.empty_like(self.x) self.output = Connector(self.output, device_id if x.bpropagable else None) self.training_mode = True
def __init__(self, matrices, u, mask=None, device_id=None): self.context = Context(device_id) device_id = self.context.device_id self.output = Matrix.empty_like(matrices[0], device_id) learning = matrices[0].bpropagable or u.bpropagable self.output = Connector(self.output, device_id if learning else None) if matrices[0].bpropagable: self.matrices, self.dL_dmatrices = \ izip(*matrices.register_usage(device_id, device_id)) else: self.matrices = matrices.register_usage(device_id) self.length = matrices.length if u.bpropagable: self.u, self.dL_du = u.register_usage(device_id, device_id) else: self.u = u.register_usage(device_id) if mask: self.mask = mask.register_usage(device_id) self.a = Matrix.empty(matrices[0].nrows, matrices.length, 'float', device_id) self.dL_dpre_a = Matrix.empty_like(self.a) self.a_cols = [self.a[:, i] for i in xrange(len(self.matrices))]
def bprop(self): if not self.bpropagable: raise ValueError('Nobody was going to use computation from backward ' 'step. You should not backward propagate!') if not self._b_matrices and not self._b_sparse_matrix: # When no one registered for providing derivatives zero dense # matrix will be returned bwd = Matrix.empty_like(self, self._bu_device_id) if self._bu_device_id not in self.context: self.context[self._bu_device_id] = Context(self._bu_device_id) bwd.fill(self.context[self._bu_device_id], 0.0) self._b_matrices[self._bu_device_id] = bwd return bwd if not self._b_matrices and self._b_sparse_matrix: return self._b_sparse_matrix for bo_device_id, bwd_matrix in self._b_matrices.iteritems(): if self._bu_device_id != bo_device_id: self._b_matrices_pool[self._bu_device_id].assign(self.context[self._bu_device_id], bwd_matrix) self._b_matrices[self._bu_device_id].add(self.context[self._bu_device_id], self._b_matrices_pool[self._bu_device_id]) if self._b_sparse_matrix: self._b_matrices[self._bu_device_id].add(self.context[self._bu_device_id], self._b_sparse_matrix) return self._b_matrices[self._bu_device_id]
def __init__(self, R, b, grad_clipping, mask, prev_c, prev_h, device_id=None): self.f_context = Context(device_id) device_id = self.f_context.device_id if R.bpropagable: self.R, self.dL_dR = R.register_usage(device_id, device_id) self.R_b_context = Context(device_id) else: self.R = R.register_usage(device_id) if b.bpropagable: self.b, self.dL_db = b.register_usage(device_id, device_id) self.b_b_context = Context(device_id) else: self.b = b.register_usage(device_id) self.grad_clipping = grad_clipping if mask: self.mask = mask.register_usage(device_id) if prev_c.bpropagable: self.prev_c, self.dL_dprev_c = prev_c.register_usage( device_id, device_id) else: self.prev_c = prev_c.register_usage(device_id) if prev_h.bpropagable: self.prev_h, self.dL_dprev_h = prev_h.register_usage( device_id, device_id) else: self.prev_h = prev_h.register_usage(device_id) self.learning = R.bpropagable or prev_c.bpropagable or prev_h.bpropagable if self.learning: self.b_context = Context(device_id) dim = self.R.nrows batch_size = self.prev_c.nrows self.zifo = Matrix.empty(batch_size, 4 * dim, device_id=device_id) self.z = self.zifo[:, 0 * dim:1 * dim] self.i = self.zifo[:, 1 * dim:2 * dim] self.f = self.zifo[:, 2 * dim:3 * dim] self.o = self.zifo[:, 3 * dim:4 * dim] self.c = Matrix.empty_like(self.prev_c, device_id) self.c = Connector(self.c, device_id if self.learning else None) self.tanh_c = Matrix.empty_like(self.c, device_id) self.h = Matrix.empty_like(self.c, device_id) self.h = Connector(self.h, device_id if self.learning else None) if self.learning: self._dzifo_dpre_zifo = Matrix.empty_like(self.zifo) self.dz_dpre_z = self._dzifo_dpre_zifo[:, 0 * dim:1 * dim] self.di_dpre_i = self._dzifo_dpre_zifo[:, 1 * dim:2 * dim] self.df_dpre_f = self._dzifo_dpre_zifo[:, 2 * dim:3 * dim] self.do_dpre_o = self._dzifo_dpre_zifo[:, 3 * dim:4 * dim] self.dL_dpre_zifo = self._dzifo_dpre_zifo self.dL_dpre_z = self.dz_dpre_z self.dL_dpre_i = self.di_dpre_i self.dL_dpre_f = self.df_dpre_f self.dL_dpre_o = self.do_dpre_o self._dtanh_c_dc = Matrix.empty_like(self.c)